diff --git a/gklearn/preimage/find_best_k.py b/gklearn/preimage/find_best_k.py
deleted file mode 100644
index df38d32..0000000
--- a/gklearn/preimage/find_best_k.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jan  9 11:54:32 2020
-
-@author: ljia
-"""
-import numpy as np
-import random
-import csv
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs
-
-def find_best_k():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'treeletkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    ds_name = 'mono'
-    dir_output = 'results/test_find_best_k/'
-    
-    repeats = 50
-    k_list = range(2, 11)
-    fit_method = 'k-graphs'
-    # fitted on the whole dataset - treelet - mono
-    edit_costs = [0.1268873773592978, 0.004084633224249829, 0.0897581955378986, 0.15328856114451297, 0.3109956881625734, 0.0]
-    
-    # create result files.
-    fn_output_detail = 'results_detail.' + fit_method + '.csv'
-    f_detail = open(dir_output + fn_output_detail, 'a')
-    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'repeat', 'median set', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM'])
-    f_detail.close()
-    fn_output_summary = 'results_summary.csv'
-    f_summary = open(dir_output + fn_output_summary, 'a')
-    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-              'repeats better dis_k gi -> GM'])
-    f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_list = []
-        sod_gm_list = []
-        dis_k_sm_list = []
-        dis_k_gm_list = []
-        dis_k_gi_min_list = []
-        nb_sod_sm2gm = [0, 0, 0]
-        nb_dis_k_sm2gm = [0, 0, 0]
-        nb_dis_k_gi2sm = [0, 0, 0]
-        nb_dis_k_gi2gm = [0, 0, 0]
-        repeats_better_sod_sm2gm = []
-        repeats_better_dis_k_sm2gm = []
-        repeats_better_dis_k_gi2sm = []
-        repeats_better_dis_k_gi2gm = []
-        
-        
-        for repeat in range(repeats):
-            print('\nrepeat =', repeat)
-            random.seed(rdn_seed_list[repeat])
-            median_set_idx = random.sample(range(0, len(Gn)), k)
-            print('median set: ', median_set_idx)
-            
-            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-                                             fit_method='k-graphs', 
-                                             edit_costs=edit_costs,
-                                             group_min=median_set_idx,
-                                             parallel=False)
-                
-            # write result detail.
-            sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-            dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-            dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-            dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-            f_detail = open(dir_output + fn_output_detail, 'a')
-            csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, repeat,
-                      median_set_idx, sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                      dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                      dis_k_gi2gm])
-            f_detail.close()
-            
-            # compute result summary.
-            sod_sm_list.append(sod_sm)
-            sod_gm_list.append(sod_gm)
-            dis_k_sm_list.append(dis_k_sm)
-            dis_k_gm_list.append(dis_k_gm)
-            dis_k_gi_min_list.append(dis_k_gi_min)
-            # # SOD SM -> GM
-            if sod_sm > sod_gm:
-                nb_sod_sm2gm[0] += 1
-                repeats_better_sod_sm2gm.append(repeat)
-            elif sod_sm == sod_gm:
-                nb_sod_sm2gm[1] += 1
-            elif sod_sm < sod_gm:
-                nb_sod_sm2gm[2] += 1
-            # # dis_k SM -> GM
-            if dis_k_sm > dis_k_gm:
-                nb_dis_k_sm2gm[0] += 1
-                repeats_better_dis_k_sm2gm.append(repeat)
-            elif dis_k_sm == dis_k_gm:
-                nb_dis_k_sm2gm[1] += 1
-            elif dis_k_sm < dis_k_gm:
-                nb_dis_k_sm2gm[2] += 1
-            # # dis_k gi -> SM
-            if dis_k_gi_min > dis_k_sm:
-                nb_dis_k_gi2sm[0] += 1
-                repeats_better_dis_k_gi2sm.append(repeat)
-            elif dis_k_gi_min == dis_k_sm:
-                nb_dis_k_gi2sm[1] += 1
-            elif dis_k_gi_min < dis_k_sm:
-                nb_dis_k_gi2sm[2] += 1
-            # # dis_k gi -> GM
-            if dis_k_gi_min > dis_k_gm:
-                nb_dis_k_gi2gm[0] += 1
-                repeats_better_dis_k_gi2gm.append(repeat)
-            elif dis_k_gi_min == dis_k_gm:
-                nb_dis_k_gi2gm[1] += 1
-            elif dis_k_gi_min < dis_k_gm:
-                nb_dis_k_gi2gm[2] += 1
-            
-        # write result summary. 
-        sod_sm_mean = np.mean(sod_sm_list)
-        sod_gm_mean = np.mean(sod_gm_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 
-                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                  dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                  nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                  repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                  repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-        f_summary.close()
-        
-    print('\ncomplete.')
-    return
-
-
-def getRelations(sign):
-    if sign == -1:
-        return 'better'
-    elif sign == 0:
-        return 'same'
-    elif sign == 1:
-        return 'worse'
-
-
-if __name__ == '__main__':
-    find_best_k()
\ No newline at end of file
diff --git a/gklearn/preimage/fitDistance.py b/gklearn/preimage/fitDistance.py
deleted file mode 100644
index 234f7fc..0000000
--- a/gklearn/preimage/fitDistance.py
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Oct 16 14:20:06 2019
-
-@author: ljia
-"""
-import numpy as np
-from tqdm import tqdm
-from itertools import combinations_with_replacement, combinations
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-import time
-import random
-import sys
-
-from scipy import optimize
-from scipy.optimize import minimize
-import cvxpy as cp
-
-from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
-from gklearn.preimage.utils import kernel_distance_matrix
-
-def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
-                               params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', 
-                                           'method': 'IPFP', 'stabilizer': None},
-                               init_costs=[3, 3, 1, 3, 3, 1],
-                               dataset='monoterpenoides', Kmatrix=None,
-                               parallel=True):
-#    dataset = dataset.lower()
-    
-    # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
-#    random.seed(1)
-#    cost_rdm = random.sample(range(1, 10), 6)
-#    init_costs = cost_rdm + [0]
-#    init_costs = cost_rdm
-#    init_costs = [3, 3, 1, 3, 3, 1]
-#    init_costs = [i * 0.01 for i in cost_rdm] + [0]
-#    init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
-#    init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
-#    init_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
-#    idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
-    
-    # compute distances in feature space.
-    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-                                                Kmatrix=Kmatrix, gkernel=gkernel)
-    dis_k_vec = []
-    for i in range(len(dis_k_mat)):
-#        for j in range(i, len(dis_k_mat)):
-        for j in range(i + 1, len(dis_k_mat)):
-            dis_k_vec.append(dis_k_mat[i, j])
-    dis_k_vec = np.array(dis_k_vec)
-    
-    # init ged.
-    print('\ninitial:')
-    time0 = time.time()
-    params_ged['dataset'] = dataset
-    params_ged['edit_cost_constant'] = init_costs
-    ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
-                                                            parallel=parallel)
-    residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]    
-    time_list = [time.time() - time0]
-    edit_cost_list = [init_costs]  
-    nb_cost_mat = np.array(n_edit_operations)
-    nb_cost_mat_list = [nb_cost_mat]
-    print('edit_costs:', init_costs)
-    print('residual_list:', residual_list)
-    
-    for itr in range(itr_max):
-        print('\niteration', itr)
-        time0 = time.time()
-        # "fit" geds to distances in feature space by tuning edit costs using the
-        # Least Squares Method.
-        np.savez('results/xp_fit_method/fit_data_debug' + str(itr) + '.gm', 
-                 nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, 
-                 n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
-                 ged_mat=ged_mat)
-        edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec, 
-                                                dataset=dataset, cost=params_ged['cost'])
-        for i in range(len(edit_costs_new)):
-            if -1e-9 <= edit_costs_new[i] <= 1e-9:
-                edit_costs_new[i] = 0
-            if edit_costs_new[i] < 0:
-                raise ValueError('The edit cost is negative.')
-#        for i in range(len(edit_costs_new)):
-#            if edit_costs_new[i] < 0:
-#                edit_costs_new[i] = 0
-
-        # compute new GEDs and numbers of edit operations.
-        params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75])
-        ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
-                                                           parallel=parallel)
-        residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
-        time_list.append(time.time() - time0)
-        edit_cost_list.append(edit_costs_new)
-        nb_cost_mat = np.array(n_edit_operations)
-        nb_cost_mat_list.append(nb_cost_mat)                        
-        print('edit_costs:', edit_costs_new)
-        print('residual_list:', residual_list)
-    
-    return edit_costs_new, residual_list, edit_cost_list, dis_k_mat, ged_mat, \
-        time_list, nb_cost_mat_list
-
-
-def compute_geds(Gn, params_ged, parallel=False):
-    edit_cost_name = params_ged['cost']
-    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2':
-        get_nb_eo = get_nb_edit_operations_letter
-    elif edit_cost_name == 'NON_SYMBOLIC':
-        get_nb_eo = get_nb_edit_operations_nonsymbolic
-    else: 
-        get_nb_eo = get_nb_edit_operations
-    ged_mat = np.zeros((len(Gn), len(Gn)))
-    if parallel:
-#        print('parallel')
-#        len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
-        len_itr = int(len(Gn) * (len(Gn) - 1) / 2)
-        ged_vec = [0 for i in range(len_itr)]
-        n_edit_operations = [0 for i in range(len_itr)]
-#        itr = combinations_with_replacement(range(0, len(Gn)), 2)
-        itr = combinations(range(0, len(Gn)), 2)
-        n_jobs = multiprocessing.cpu_count()
-        if len_itr < 100 * n_jobs:
-            chunksize = int(len_itr / n_jobs) + 1
-        else:
-            chunksize = 100
-        def init_worker(gn_toshare):
-            global G_gn
-            G_gn = gn_toshare
-        do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo)
-        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
-        iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
-                        desc='computing GEDs', file=sys.stdout)
-#        iterator = pool.imap_unordered(do_partial, itr, chunksize)
-        for i, j, dis, n_eo_tmp in iterator:
-            idx_itr = int(len(Gn) * i + j - (i + 1) * (i + 2) / 2)
-            ged_vec[idx_itr] = dis
-            ged_mat[i][j] = dis
-            ged_mat[j][i] = dis
-            n_edit_operations[idx_itr] = n_eo_tmp
-#            print('\n-------------------------------------------')
-#            print(i, j, idx_itr, dis)
-        pool.close()
-        pool.join()
-        
-    else:
-        ged_vec = []
-        n_edit_operations = []
-        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-#        for i in range(len(Gn)):
-            for j in range(i + 1, len(Gn)):
-                dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], **params_ged)
-                ged_vec.append(dis)
-                ged_mat[i][j] = dis
-                ged_mat[j][i] = dis
-                n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward)
-                n_edit_operations.append(n_eo_tmp)
-                    
-    return ged_vec, ged_mat, n_edit_operations
-                    
-
-def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr):
-    i = itr[0]
-    j = itr[1]
-    dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo)
-    return i, j, dis, n_eo_tmp
-
-
-def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo):
-    dis, pi_forward, pi_backward = GED(g1, g2, **params_ged)
-    n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0]
-    return dis, n_eo_tmp
-
-
-def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides', 
-                 cost='CONSTANT', rw_constraints='inequality'):
-#    if dataset == 'Letter-high':
-    if cost == 'LETTER':            
-        pass
-#        # method 1: set alpha automatically, just tune c_vir and c_eir by 
-#        # LMS using cvxpy.
-#        alpha = 0.5
-#        coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
-##        if np.count_nonzero(nb_cost_mat[:,4]) == 0:
-##            alpha = 0.75
-##        else:
-##            alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
-##        alpha = alpha * 0.99
-#        param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
-#        param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
-#        nb_cost_mat_new = np.column_stack((param_vir, param_eir))
-#        dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
-#        
-#        x = cp.Variable(nb_cost_mat_new.shape[1])
-#        cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
-#        constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-#        prob = cp.Problem(cp.Minimize(cost), constraints)
-#        prob.solve()
-#        edit_costs_new = x.value
-#        edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
-#        residual = np.sqrt(prob.value)
-    
-#        # method 2: tune c_vir, c_eir and alpha by nonlinear programming by 
-#        # scipy.optimize.minimize.
-#        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
-#        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
-#        w2 = nb_cost_mat[:,3]
-#        w3 = dis_k_vec
-#        func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
-#                             + w2 * x[2] - w3 * x[3]) ** 2)
-#        bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
-#        res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
-#        edit_costs_new = res.x[0:3]
-#        residual = res.fun
-    
-    # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
-    
-    
-#        # method 4: tune c_vir, c_eir and alpha by QP function
-#        # scipy.optimize.least_squares. An initial guess is required.
-#        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
-#        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
-#        w2 = nb_cost_mat[:,3]
-#        w3 = dis_k_vec
-#        func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
-#                             + w2 * x[2] - w3 * x[3]) ** 2
-#        res = optimize.root(func, [0.9, 1.7, 0.75, 100])
-#        edit_costs_new = res.x
-#        residual = None
-    elif cost == 'LETTER2':
-#            # 1. if c_vi != c_vr, c_ei != c_er.
-#            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-#            x = cp.Variable(nb_cost_mat_new.shape[1])
-#            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-##            # 1.1 no constraints.
-##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-#            # 1.2 c_vs <= c_vi + c_vr.
-#            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-#                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]            
-##            # 2. if c_vi == c_vr, c_ei == c_er.
-##            nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
-##            nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
-##            nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
-##            x = cp.Variable(nb_cost_mat_new.shape[1])
-##            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-##            # 2.1 no constraints.
-##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-###            # 2.2 c_vs <= c_vi + c_vr.
-###            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-###                           np.array([2.0, -1.0, 0.0]).T@x >= 0.0]     
-#            
-#            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-#            prob.solve()
-#            edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
-#            edit_costs_new = np.array(edit_costs_new)
-#            residual = np.sqrt(prob.value)
-        if rw_constraints == 'inequality':
-            # c_vs <= c_vi + c_vr.
-            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-            constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
-                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            try:
-                prob.solve(verbose=True)
-            except MemoryError as error0:
-                print('\nUsing solver "OSQP" caused a memory error.')
-                print('the original error message is\n', error0)
-                print('solver status: ', prob.status)
-                print('trying solver "CVXOPT" instead...\n')
-                try:
-                    prob.solve(solver=cp.CVXOPT, verbose=True)
-                except Exception as error1:
-                    print('\nAn error occured when using solver "CVXOPT".')
-                    print('the original error message is\n', error1)
-                    print('solver status: ', prob.status)
-                    print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
-                    prob.solve(solver=cp.MOSEK, verbose=True)
-                else:
-                    print('solver status: ', prob.status)                    
-            else:
-                print('solver status: ', prob.status)
-            print()
-            edit_costs_new = x.value
-            residual = np.sqrt(prob.value)
-        elif rw_constraints == '2constraints':
-            # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
-            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
-                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
-                           np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
-                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            prob.solve()
-            edit_costs_new = x.value
-            residual = np.sqrt(prob.value)
-        elif rw_constraints == 'no-constraint':
-            # no constraint.
-            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            prob.solve()
-            edit_costs_new = x.value
-            residual = np.sqrt(prob.value)
-#            elif method == 'inequality_modified':
-#                # c_vs <= c_vi + c_vr.
-#                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-#                x = cp.Variable(nb_cost_mat_new.shape[1])
-#                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-#                constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-#                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-#                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-#                prob.solve()
-#                # use same costs for insertion and removal rather than the fitted costs.
-#                edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
-#                edit_costs_new = np.array(edit_costs_new)
-#                residual = np.sqrt(prob.value)
-    elif cost == 'NON_SYMBOLIC':
-        is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
-        is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
-        
-        if dataset == 'SYNTHETICnew':
-#            nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
-            nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-#            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-#                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
-#            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
-            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
-                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            prob.solve()
-#            print(x.value)
-            edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value, 
-                                             np.array([0.0])))
-            residual = np.sqrt(prob.value)
-            
-        elif rw_constraints == 'inequality':
-            # c_vs <= c_vi + c_vr.
-            if is_n_attr and is_e_attr:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
-                               np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
-                               np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                edit_costs_new = x.value
-                residual = np.sqrt(prob.value)
-            elif is_n_attr and not is_e_attr:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
-                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                print(x.value)
-                edit_costs_new = np.concatenate((x.value, np.array([0.0])))
-                residual = np.sqrt(prob.value)
-            elif not is_n_attr and is_e_attr:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
-                               np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
-                residual = np.sqrt(prob.value)
-            else:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), 
-                                                 x.value[2:], np.array([0.0])))
-                residual = np.sqrt(prob.value)
-    else:
-#    # method 1: simple least square method.
-#    edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
-#                                                     rcond=None)
-    
-#    # method 2: least square method with x_i >= 0.
-#    edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
-    
-    # method 3: solve as a quadratic program with constraints.
-#    P = np.dot(nb_cost_mat.T, nb_cost_mat)
-#    q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
-#    G = -1 * np.identity(nb_cost_mat.shape[1])
-#    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
-#    A = np.array([1 for i in range(nb_cost_mat.shape[1])])
-#    b = 1
-#    x = cp.Variable(nb_cost_mat.shape[1])
-#    prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
-#                      [G@x <= h])
-#    prob.solve()
-#    edit_costs_new = x.value
-#    residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
-    
-#    G = -1 * np.identity(nb_cost_mat.shape[1])
-#    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
-        x = cp.Variable(nb_cost_mat.shape[1])
-        cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
-        constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
-    #                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-                       np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
-                       np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
-        prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-        prob.solve()
-        edit_costs_new = x.value
-        residual = np.sqrt(prob.value)
-    
-    # method 4: 
-    
-    return edit_costs_new, residual
-
-
-if __name__ == '__main__':
-    print('check test_fitDistance.py')
\ No newline at end of file
diff --git a/gklearn/preimage/ged.py b/gklearn/preimage/ged.py
deleted file mode 100644
index a66baaf..0000000
--- a/gklearn/preimage/ged.py
+++ /dev/null
@@ -1,467 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 17 18:44:59 2019
-
-@author: ljia
-"""
-import numpy as np
-import networkx as nx
-from tqdm import tqdm
-import sys
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-
-#from gedlibpy_linlin import librariesImport, gedlibpy
-from gklearn.gedlib import librariesImport, gedlibpy
-
-def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', 
-        edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50):
-    """
-    Compute GED for 2 graphs.
-    """    
-    
-#    dataset = dataset.lower()
-    
-    if lib == 'gedlibpy':
-        gedlibpy.restart_env()
-        gedlibpy.add_nx_graph(convertGraph(g1, cost), "")
-        gedlibpy.add_nx_graph(convertGraph(g2, cost), "")
-
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
-        gedlibpy.init()
-        gedlibpy.set_method(method, algo_options)
-        gedlibpy.init_method()
-
-        g = listID[0]
-        h = listID[1]
-        if stabilizer is None:
-            gedlibpy.run_method(g, h)
-            pi_forward = gedlibpy.get_forward_map(g, h)
-            pi_backward = gedlibpy.get_backward_map(g, h)
-            upper = gedlibpy.get_upper_bound(g, h)
-            lower = gedlibpy.get_lower_bound(g, h)        
-        elif stabilizer == 'mean':
-            # @todo: to be finished...
-            upper_list = [np.inf] * repeat
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
-                pi_forward = gedlibpy.get_forward_map(g, h)
-                pi_backward = gedlibpy.get_backward_map(g, h)
-                lower = gedlibpy.get_lower_bound(g, h)
-            upper = np.mean(upper_list)
-        elif stabilizer == 'median':
-            if repeat % 2 == 0:
-                repeat += 1
-            upper_list = [np.inf] * repeat
-            pi_forward_list = [0] * repeat
-            pi_backward_list = [0] * repeat
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
-                pi_forward_list[itr] = gedlibpy.get_forward_map(g, h)
-                pi_backward_list[itr] = gedlibpy.get_backward_map(g, h)
-                lower = gedlibpy.get_lower_bound(g, h)
-            upper = np.median(upper_list)
-            idx_median = upper_list.index(upper)
-            pi_forward = pi_forward_list[idx_median]
-            pi_backward = pi_backward_list[idx_median]
-        elif stabilizer == 'min':
-            upper = np.inf
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_tmp = gedlibpy.get_upper_bound(g, h)                
-                if upper_tmp < upper:
-                    upper = upper_tmp
-                    pi_forward = gedlibpy.get_forward_map(g, h)
-                    pi_backward = gedlibpy.get_backward_map(g, h)
-                    lower = gedlibpy.get_lower_bound(g, h)
-                if upper == 0:
-                    break
-        elif stabilizer == 'max':
-            upper = 0
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_tmp = gedlibpy.get_upper_bound(g, h)                
-                if upper_tmp > upper:
-                    upper = upper_tmp
-                    pi_forward = gedlibpy.get_forward_map(g, h)
-                    pi_backward = gedlibpy.get_backward_map(g, h)
-                    lower = gedlibpy.get_lower_bound(g, h)
-        elif stabilizer == 'gaussian':
-            pass
-                    
-        dis = upper
-        
-    elif lib == 'gedlib-bash':
-        import time
-        import random
-        import os
-        from gklearn.utils.graphfiles import saveDataset
-        
-        tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
-        if not os.path.exists(tmp_dir):
-            os.makedirs(tmp_dir)
-        fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
-        xparams = {'method': 'gedlib', 'graph_dir': fn_collection}
-        saveDataset([g1, g2], ['dummy', 'dummy'], gformat='gxl', group='xml', 
-                    filename=fn_collection, xparams=xparams)
-        
-        command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
-        command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
-        command += 'export LD_LIBRARY_PATH\n'
-        command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
-        command += './ged_for_python_bash monoterpenoides ' + fn_collection \
-                + ' \'' + algo_options + '\' '
-        for ec in edit_cost_constant:
-            command += str(ec) + ' '
-#        output = os.system(command)
-        stream = os.popen(command)
-        output = stream.readlines()
-#        print(output)
-        
-        dis = float(output[0].strip())
-        runtime = float(output[1].strip())
-        size_forward = int(output[2].strip())
-        pi_forward = [int(item.strip()) for item in output[3:3+size_forward]]
-        pi_backward = [int(item.strip()) for item in output[3+size_forward:]]
-
-#        print(dis)
-#        print(runtime)
-#        print(size_forward)
-#        print(pi_forward)
-#        print(pi_backward)
-                
-        
-    # make the map label correct (label remove map as np.inf)
-    nodes1 = [n for n in g1.nodes()]
-    nodes2 = [n for n in g2.nodes()]
-    nb1 = nx.number_of_nodes(g1)
-    nb2 = nx.number_of_nodes(g2)
-    pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
-    pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
-#        print(pi_forward)
-              
-        
-    return dis, pi_forward, pi_backward
-
-
-def convertGraph(G, cost):
-    """Convert a graph to the proper NetworkX format that can be
-    recognized by library gedlibpy.
-    """
-    G_new = nx.Graph()
-    if cost == 'LETTER' or cost == 'LETTER2':   
-        for nd, attrs in G.nodes(data=True):
-            G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
-                           y=str(attrs['attributes'][1]))
-        for nd1, nd2, attrs in G.edges(data=True):
-            G_new.add_edge(str(nd1), str(nd2))
-    elif cost == 'NON_SYMBOLIC':
-        for nd, attrs in G.nodes(data=True):
-            G_new.add_node(str(nd))
-            for a_name in G.graph['node_attrs']:
-                G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
-        for nd1, nd2, attrs in G.edges(data=True):
-            G_new.add_edge(str(nd1), str(nd2))
-            for a_name in G.graph['edge_attrs']:
-                G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
-    else:
-        for nd, attrs in G.nodes(data=True):
-            G_new.add_node(str(nd), chem=attrs['atom'])
-        for nd1, nd2, attrs in G.edges(data=True):
-            G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-#                G_new.add_edge(str(nd1), str(nd2))
-        
-    return G_new
-
-
-def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
-        edit_cost_constant=[], stabilizer='min', repeat=50):
-    """
-    Compute GEDs for a group of graphs.
-    """
-    if lib == 'gedlibpy':
-        def convertGraph(G):
-            """Convert a graph to the proper NetworkX format that can be
-            recognized by library gedlibpy.
-            """
-            G_new = nx.Graph()
-            for nd, attrs in G.nodes(data=True):
-                G_new.add_node(str(nd), chem=attrs['atom'])
-            for nd1, nd2, attrs in G.edges(data=True):
-#                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-                G_new.add_edge(str(nd1), str(nd2))
-                
-            return G_new
-        
-        gedlibpy.restart_env()
-        gedlibpy.add_nx_graph(convertGraph(g1), "")
-        gedlibpy.add_nx_graph(convertGraph(g2), "")
-
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
-        gedlibpy.init()
-        gedlibpy.set_method(method, "")
-        gedlibpy.init_method()
-
-        g = listID[0]
-        h = listID[1]
-        if stabilizer is None:
-            gedlibpy.run_method(g, h)
-            pi_forward = gedlibpy.get_forward_map(g, h)
-            pi_backward = gedlibpy.get_backward_map(g, h)
-            upper = gedlibpy.get_upper_bound(g, h)
-            lower = gedlibpy.get_lower_bound(g, h)        
-        elif stabilizer == 'min':
-            upper = np.inf
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_tmp = gedlibpy.get_upper_bound(g, h)                
-                if upper_tmp < upper:
-                    upper = upper_tmp
-                    pi_forward = gedlibpy.get_forward_map(g, h)
-                    pi_backward = gedlibpy.get_backward_map(g, h)
-                    lower = gedlibpy.get_lower_bound(g, h)
-                if upper == 0:
-                    break
-                    
-        dis = upper
-        
-        # make the map label correct (label remove map as np.inf)
-        nodes1 = [n for n in g1.nodes()]
-        nodes2 = [n for n in g2.nodes()]
-        nb1 = nx.number_of_nodes(g1)
-        nb2 = nx.number_of_nodes(g2)
-        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
-        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
-        
-    return dis, pi_forward, pi_backward
-
-
-def ged_median(Gn, Gn_median, verbose=False, params_ged={'lib': 'gedlibpy', 
-               'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], 
-               'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1',
-               'stabilizer': None}, parallel=False):
-    if parallel:
-        len_itr = int(len(Gn))
-        pi_forward_list = [[] for i in range(len_itr)]
-        dis_list = [0 for i in range(len_itr)]
-               
-        itr = range(0, len_itr)
-        n_jobs = multiprocessing.cpu_count()
-        if len_itr < 100 * n_jobs:
-            chunksize = int(len_itr / n_jobs) + 1
-        else:
-            chunksize = 100
-        def init_worker(gn_toshare, gn_median_toshare):
-            global G_gn, G_gn_median
-            G_gn = gn_toshare
-            G_gn_median = gn_median_toshare
-        do_partial = partial(_compute_ged_median, params_ged)
-        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn, Gn_median))
-        if verbose:
-            iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
-                            desc='computing GEDs', file=sys.stdout)
-        else:
-            iterator = pool.imap_unordered(do_partial, itr, chunksize)
-        for i, dis_sum, pi_forward in iterator:
-            pi_forward_list[i] = pi_forward
-            dis_list[i] = dis_sum
-#            print('\n-------------------------------------------')
-#            print(i, j, idx_itr, dis)
-        pool.close()
-        pool.join()
-        
-    else:
-        dis_list = []
-        pi_forward_list = []
-        for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
-                           file=sys.stdout) if verbose else enumerate(Gn):
-            dis_sum = 0
-            pi_forward_list.append([])
-            for G_p in Gn_median:
-                dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
-                    **params_ged)
-                pi_forward_list[idx].append(pi_tmp_forward)
-                dis_sum += dis_tmp
-            dis_list.append(dis_sum)
-            
-    return dis_list, pi_forward_list
-
-
-def _compute_ged_median(params_ged, itr):
-#    print(itr)
-    dis_sum = 0
-    pi_forward = []
-    for G_p in G_gn_median:
-        dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_gn[itr], G_p, 
-                    **params_ged)
-        pi_forward.append(pi_tmp_forward)
-        dis_sum += dis_tmp
-        
-    return itr, dis_sum, pi_forward
-
-
-def get_nb_edit_operations(g1, g2, forward_map, backward_map):
-    """Compute the number of each edit operations.
-    """
-    n_vi = 0
-    n_vr = 0
-    n_vs = 0
-    n_ei = 0
-    n_er = 0
-    n_es = 0
-    
-    nodes1 = [n for n in g1.nodes()]
-    for i, map_i in enumerate(forward_map):
-        if map_i == np.inf:
-            n_vr += 1
-        elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
-            n_vs += 1
-    for map_i in backward_map:
-        if map_i == np.inf:
-            n_vi += 1
-    
-#    idx_nodes1 = range(0, len(node1))
-    
-    edges1 = [e for e in g1.edges()]
-    nb_edges2_cnted = 0
-    for n1, n2 in edges1:
-        idx1 = nodes1.index(n1)
-        idx2 = nodes1.index(n2)
-        # one of the nodes is removed, thus the edge is removed.
-        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
-            n_er += 1
-        # corresponding edge is in g2.
-        elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
-            nb_edges2_cnted += 1
-            # edge labels are different.
-            if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
-                != g1.edges[(n1, n2)]['bond_type']:
-                    n_es += 1
-        elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
-            nb_edges2_cnted += 1
-            # edge labels are different.
-            if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
-                != g1.edges[(n1, n2)]['bond_type']:
-                    n_es += 1                
-        # corresponding nodes are in g2, however the edge is removed.
-        else:
-            n_er += 1
-    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-    
-    return n_vi, n_vr, n_vs, n_ei, n_er, n_es
-
-
-def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
-    """Compute the number of each edit operations.
-    """
-    n_vi = 0
-    n_vr = 0
-    n_vs = 0
-    sod_vs = 0
-    n_ei = 0
-    n_er = 0
-    
-    nodes1 = [n for n in g1.nodes()]
-    for i, map_i in enumerate(forward_map):
-        if map_i == np.inf:
-            n_vr += 1
-        else:
-            n_vs += 1
-            diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
-            diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
-            sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
-    for map_i in backward_map:
-        if map_i == np.inf:
-            n_vi += 1
-    
-#    idx_nodes1 = range(0, len(node1))
-    
-    edges1 = [e for e in g1.edges()]
-    nb_edges2_cnted = 0
-    for n1, n2 in edges1:
-        idx1 = nodes1.index(n1)
-        idx2 = nodes1.index(n2)
-        # one of the nodes is removed, thus the edge is removed.
-        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
-            n_er += 1
-        # corresponding edge is in g2. Edge label is not considered.
-        elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
-            (forward_map[idx2], forward_map[idx1]) in g2.edges():
-                nb_edges2_cnted += 1
-        # corresponding nodes are in g2, however the edge is removed.
-        else:
-            n_er += 1
-    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-    
-    return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
-
-
-def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
-    """Compute the number of each edit operations.
-    """
-    n_vi = 0
-    n_vr = 0
-    n_vs = 0
-    sod_vs = 0
-    n_ei = 0
-    n_er = 0
-    n_es = 0
-    sod_es = 0
-    
-    nodes1 = [n for n in g1.nodes()]
-    for i, map_i in enumerate(forward_map):
-        if map_i == np.inf:
-            n_vr += 1
-        else:
-            n_vs += 1
-            sum_squares = 0
-            for a_name in g1.graph['node_attrs']:
-                diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
-                sum_squares += np.square(diff)
-            sod_vs += np.sqrt(sum_squares)
-    for map_i in backward_map:
-        if map_i == np.inf:
-            n_vi += 1
-    
-#    idx_nodes1 = range(0, len(node1))
-    
-    edges1 = [e for e in g1.edges()]
-    for n1, n2 in edges1:
-        idx1 = nodes1.index(n1)
-        idx2 = nodes1.index(n2)
-        n1_g2 = forward_map[idx1]
-        n2_g2 = forward_map[idx2]
-        # one of the nodes is removed, thus the edge is removed.
-        if n1_g2 == np.inf or n2_g2 == np.inf:
-            n_er += 1
-        # corresponding edge is in g2.
-        elif (n1_g2, n2_g2) in g2.edges():
-            n_es += 1
-            sum_squares = 0
-            for a_name in g1.graph['edge_attrs']:
-                diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
-                sum_squares += np.square(diff)
-            sod_es += np.sqrt(sum_squares)
-        elif (n2_g2, n1_g2) in g2.edges():
-            n_es += 1
-            sum_squares = 0
-            for a_name in g1.graph['edge_attrs']:
-                diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
-                sum_squares += np.square(diff)
-            sod_es += np.sqrt(sum_squares)
-        # corresponding nodes are in g2, however the edge is removed.
-        else:
-            n_er += 1
-    n_ei = nx.number_of_edges(g2) - n_es
-        
-    return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
-
-
-if __name__ == '__main__':
-    print('check test_ged.py')
\ No newline at end of file
diff --git a/gklearn/preimage/iam.py b/gklearn/preimage/iam.py
deleted file mode 100644
index f3e2165..0000000
--- a/gklearn/preimage/iam.py
+++ /dev/null
@@ -1,775 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Apr 26 11:49:12 2019
-
-Iterative alternate minimizations using GED.
-@author: ljia
-"""
-import numpy as np
-import random
-import networkx as nx
-from tqdm import tqdm
-
-from gklearn.utils.graphdataset import get_dataset_attributes
-from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
-from gklearn.preimage.ged import GED, ged_median
-
-
-def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, 
-        epsilon=0.001, node_label='atom', edge_label='bond_type', 
-        connected=False, removeNodes=True, allBestInit=False, allBestNodes=False,
-        allBestEdges=False, allBestOutput=False,
-        params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
-                    'edit_cost_constant': [], 'stabilizer': None, 
-                    'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'}):
-    """See my name, then you know what I do.
-    """
-#    Gn_median = Gn_median[0:10]
-#    Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
-    node_ir = np.inf # corresponding to the node remove and insertion.
-    label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
-    ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, 
-                                      attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], 
-                                      edge_label=edge_label)
-    node_label_set = get_node_labels(Gn_median, node_label)
-    edge_label_set = get_edge_labels(Gn_median, edge_label)
-
-    
-    def generate_graph(G, pi_p_forward):
-        G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
-#        nx.draw_networkx(G)
-#        import matplotlib.pyplot as plt
-#        plt.show()
-#        print(pi_p_forward)
-                    
-        # update vertex labels.
-        # pre-compute h_i0 for each label.
-#        for label in get_node_labels(Gn, node_label):
-#            print(label)
-#        for nd in G.nodes(data=True):
-#            pass
-        if not ds_attrs['node_attr_dim']: # labels are symbolic
-            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
-                h_i0_list = []
-                label_list = []
-                for label in node_label_set:
-                    h_i0 = 0
-                    for idx, g in enumerate(Gn_median):
-                        pi_i = pi_p_forward[idx][ndi]
-                        if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
-                            h_i0 += 1
-                    h_i0_list.append(h_i0)
-                    label_list.append(label)
-                # case when the node is to be removed.
-                if removeNodes:
-                    h_i0_remove = 0 # @todo: maybe this can be added to the node_label_set above.
-                    for idx, g in enumerate(Gn_median):
-                        pi_i = pi_p_forward[idx][ndi]
-                        if pi_i == node_ir:
-                            h_i0_remove += 1
-                    h_i0_list.append(h_i0_remove)
-                    label_list.append(label_r)
-                # get the best labels.
-                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
-                if allBestNodes: # choose all best graphs.                    
-                    nlabel_best = [label_list[idx] for idx in idx_max]
-                    # generate "best" graphs with regard to "best" node labels.
-                    G_new_list_nd = []
-                    for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
-                        for nl in nlabel_best:
-                            g_tmp = g.copy()
-                            if nl == label_r:
-                                g_tmp.remove_node(nd)
-                            else:
-                                g_tmp.nodes[nd][node_label] = nl
-                            G_new_list_nd.append(g_tmp)
-    #                            nx.draw_networkx(g_tmp)
-    #                            import matplotlib.pyplot as plt
-    #                            plt.show()
-    #                            print(g_tmp.nodes(data=True))
-    #                            print(g_tmp.edges(data=True))
-                    G_new_list = [ggg.copy() for ggg in G_new_list_nd]
-                else: 
-                    # choose one of the best randomly.
-                    idx_rdm = random.randint(0, len(idx_max) - 1)
-                    best_label = label_list[idx_max[idx_rdm]]
-                    h_i0_max = h_i0_list[idx_max[idx_rdm]]
-
-                    g_new = G_new_list[0]
-                    if best_label == label_r:
-                        g_new.remove_node(nd) 
-                    else:
-                        g_new.nodes[nd][node_label] = best_label
-                    G_new_list = [g_new]
-        else: # labels are non-symbolic
-            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
-                Si_norm = 0
-                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
-                for idx, g in enumerate(Gn_median):
-                    pi_i = pi_p_forward[idx][ndi]
-                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
-                        Si_norm += 1
-                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
-                phi_i_bar /= Si_norm
-                G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
-                
-#        for g in G_new_list:
-#            import matplotlib.pyplot as plt 
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-                                            
-        # update edge labels and adjacency matrix.
-        if ds_attrs['edge_labeled']:
-            G_new_list_edge = []
-            for g_new in G_new_list:
-                nd_list = [n for n in g_new.nodes()]
-                g_tmp_list = [g_new.copy()]
-                for nd1i in range(nx.number_of_nodes(g_new)): 
-                    nd1 = nd_list[nd1i]# @todo: not just edges, but all pairs of nodes
-                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_new)):
-                        nd2 = nd_list[nd2i]
-#                for nd1, nd2, _ in g_new.edges(data=True): 
-                        h_ij0_list = []
-                        label_list = []
-                        for label in edge_label_set:
-                            h_ij0 = 0
-                            for idx, g in enumerate(Gn_median):
-                                pi_i = pi_p_forward[idx][nd1i]
-                                pi_j = pi_p_forward[idx][nd2i]
-                                h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
-                                           g.has_edge(pi_i, pi_j) and 
-                                           g.edges[pi_i, pi_j][edge_label] == label)
-                                h_ij0 += h_ij0_p
-                            h_ij0_list.append(h_ij0)
-                            label_list.append(label)
-                        
-                        # get the best labels.
-                        idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
-                        if allBestEdges: # choose all best graphs.
-                            elabel_best = [label_list[idx] for idx in idx_max]
-                            h_ij0_max = [h_ij0_list[idx] for idx in idx_max]
-                            # generate "best" graphs with regard to "best" node labels.
-                            G_new_list_ed = []
-                            for g_tmp in g_tmp_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
-                                for idxl, el in enumerate(elabel_best):
-                                    g_tmp_copy = g_tmp.copy()
-                                    # check whether a_ij is 0 or 1.
-                                    sij_norm = 0
-                                    for idx, g in enumerate(Gn_median):
-                                        pi_i = pi_p_forward[idx][nd1i]
-                                        pi_j = pi_p_forward[idx][nd2i]
-                                        if g.has_node(pi_i) and g.has_node(pi_j) and \
-                                            g.has_edge(pi_i, pi_j):
-                                           sij_norm += 1
-                                    if h_ij0_max[idxl] > len(Gn_median) * c_er / c_es + \
-                                        sij_norm * (1 - (c_er + c_ei) / c_es):
-                                        if not g_tmp_copy.has_edge(nd1, nd2):
-                                            g_tmp_copy.add_edge(nd1, nd2)
-                                        g_tmp_copy.edges[nd1, nd2][edge_label] = elabel_best[idxl]
-                                    else:
-                                        if g_tmp_copy.has_edge(nd1, nd2):
-                                            g_tmp_copy.remove_edge(nd1, nd2)
-                                    G_new_list_ed.append(g_tmp_copy)
-                            g_tmp_list = [ggg.copy() for ggg in G_new_list_ed]
-                        else: # choose one of the best randomly.
-                            idx_rdm = random.randint(0, len(idx_max) - 1)
-                            best_label = label_list[idx_max[idx_rdm]]
-                            h_ij0_max = h_ij0_list[idx_max[idx_rdm]]
-                                   
-                            # check whether a_ij is 0 or 1.
-                            sij_norm = 0
-                            for idx, g in enumerate(Gn_median):
-                                pi_i = pi_p_forward[idx][nd1i]
-                                pi_j = pi_p_forward[idx][nd2i]
-                                if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                                   sij_norm += 1
-                            if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                                if not g_new.has_edge(nd1, nd2):
-                                    g_new.add_edge(nd1, nd2)
-                                g_new.edges[nd1, nd2][edge_label] = best_label
-                            else:
-#                            elif h_ij0_max < len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                                if g_new.has_edge(nd1, nd2):
-                                    g_new.remove_edge(nd1, nd2) 
-                            g_tmp_list = [g_new]
-                G_new_list_edge += g_tmp_list
-            G_new_list = [ggg.copy() for ggg in G_new_list_edge]    
-                    
-               
-        else: # if edges are unlabeled
-            # @todo: is this even right? G or g_tmp? check if the new one is right
-            # @todo: works only for undirected graphs.
-            
-            for g_tmp in G_new_list:
-                nd_list = [n for n in g_tmp.nodes()]
-                for nd1i in range(nx.number_of_nodes(g_tmp)):
-                    nd1 = nd_list[nd1i]
-                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_tmp)):
-                        nd2 = nd_list[nd2i]
-                        sij_norm = 0
-                        for idx, g in enumerate(Gn_median):
-                            pi_i = pi_p_forward[idx][nd1i]
-                            pi_j = pi_p_forward[idx][nd2i]
-                            if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                               sij_norm += 1
-                        if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
-                            # @todo: should we consider if nd1 and nd2 in g_tmp?
-                            # or just add the edge anyway?
-                            if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
-                                and not g_tmp.has_edge(nd1, nd2):
-                                g_tmp.add_edge(nd1, nd2)
-                        else: # @todo: which to use?
-#                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
-                            if g_tmp.has_edge(nd1, nd2):
-                                g_tmp.remove_edge(nd1, nd2)
-                        # do not change anything when equal.     
-                        
-#        for i, g in enumerate(G_new_list):
-#            import matplotlib.pyplot as plt 
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-        
-#        # find the best graph generated in this iteration and update pi_p.
-        # @todo: should we update all graphs generated or just the best ones?
-        dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, 
-            params_ged=params_ged)
-        # @todo: should we remove the identical and connectivity check? 
-        # Don't know which is faster.
-        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
-            G_new_list, idx_list = remove_duplicates(G_new_list)
-            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-            dis_list = [dis_list[idx] for idx in idx_list]
-#        if connected == True:
-#            G_new_list, idx_list = remove_disconnected(G_new_list)
-#            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-#        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
-#        dis_min = dis_list[idx_min_tmp_list[0]]
-#        pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
-#        G_new_list = [G_new_list[idx] for idx in idx_min_list] 
-        
-#        for g in G_new_list:
-#            import matplotlib.pyplot as plt 
-#            nx.draw_networkx(g)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-        
-        return G_new_list, pi_forward_list, dis_list
-    
-    
-    def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
-        idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
-        dis_min = dis_all[idx_min_list[0]]
-        pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
-        G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
-        return G_min_list, pi_forward_min_list, dis_min
-    
-    
-    def iteration_proc(G, pi_p_forward, cur_sod):
-        G_list = [G]
-        pi_forward_list = [pi_p_forward]
-        old_sod = cur_sod * 2
-        sod_list = [cur_sod]
-        dis_list = [cur_sod]
-        # iterations.
-        itr = 0
-        # @todo: what if difference == 0?
-#        while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or
-#                                 np.abs(old_sod - cur_sod) == 0):
-        while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
-#        while itr < ite_max:
-#        for itr in range(0, 5): # the convergence condition?
-            print('itr_iam is', itr)
-            G_new_list = []
-            pi_forward_new_list = []
-            dis_new_list = []
-            for idx, g in enumerate(G_list):
-#                label_set = get_node_labels(Gn_median + [g], node_label)                        
-                G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
-                    g, pi_forward_list[idx])
-                G_new_list += G_tmp_list
-                pi_forward_new_list += pi_forward_tmp_list
-                dis_new_list += dis_tmp_list
-            # @todo: need to remove duplicates here?
-            G_list = [ggg.copy() for ggg in G_new_list]
-            pi_forward_list = [pitem.copy() for pitem in pi_forward_new_list]
-            dis_list = dis_new_list[:]
-            
-            old_sod = cur_sod
-            cur_sod = np.min(dis_list)
-            sod_list.append(cur_sod)
-            
-            itr += 1
-        
-        # @todo: do we return all graphs or the best ones?
-        # get the best ones of the generated graphs.
-        G_list, pi_forward_list, dis_min = best_median_graphs(
-            G_list, pi_forward_list, dis_list)
-        
-        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
-            G_list, idx_list = remove_duplicates(G_list)
-            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-#            dis_list = [dis_list[idx] for idx in idx_list]
-            
-#        import matplotlib.pyplot as plt
-#        for g in G_list:             
-#            nx.draw_networkx(g)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-        print('\nsods:', sod_list, '\n')
-            
-        return G_list, pi_forward_list, dis_min, sod_list
-    
-    
-    def remove_duplicates(Gn):
-        """Remove duplicate graphs from list.
-        """
-        Gn_new = []
-        idx_list = []
-        for idx, g in enumerate(Gn):
-            dupl = False
-            for g_new in Gn_new:
-                if graph_isIdentical(g_new, g):
-                    dupl = True
-                    break
-            if not dupl:
-                Gn_new.append(g)
-                idx_list.append(idx)
-        return Gn_new, idx_list
-    
-    
-    def remove_disconnected(Gn):
-        """Remove disconnected graphs from list.
-        """
-        Gn_new = []
-        idx_list = []
-        for idx, g in enumerate(Gn):
-            if nx.is_connected(g):
-                Gn_new.append(g)
-                idx_list.append(idx)
-        return Gn_new, idx_list
-
-    
-    ###########################################################################
-    
-    # phase 1: initilize.
-    # compute set-median.
-    dis_min = np.inf
-    dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
-        params_ged=params_ged, parallel=True)
-    print('finish computing GEDs.')
-    # find all smallest distances.
-    if allBestInit: # try all best init graphs.
-        idx_min_list = range(len(dis_list))
-        dis_min = dis_list
-    else:
-        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
-        dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list)
-        idx_min_rdm = random.randint(0, len(idx_min_list) - 1)
-        idx_min_list = [idx_min_list[idx_min_rdm]]
-    sod_set_median = np.min(dis_min)
-        
-    
-    # phase 2: iteration.
-    G_list = []
-    dis_list = []
-    pi_forward_list = []
-    G_set_median_list = []
-#    sod_list = []
-    for idx_tmp, idx_min in enumerate(idx_min_list):
-#        print('idx_min is', idx_min)
-        G = Gn_candidate[idx_min].copy()
-        G_set_median_list.append(G.copy())
-        # list of edit operations.        
-        pi_p_forward = pi_forward_all[idx_min]
-#        pi_p_backward = pi_all_backward[idx_min]        
-        Gi_list, pi_i_forward_list, dis_i_min, sod_list = iteration_proc(G, 
-                                                pi_p_forward, dis_min[idx_tmp])            
-        G_list += Gi_list
-        dis_list += [dis_i_min] * len(Gi_list)
-        pi_forward_list += pi_i_forward_list
-        
-        
-    if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
-        G_list, idx_list = remove_duplicates(G_list)
-        dis_list = [dis_list[idx] for idx in idx_list]
-        pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-    if connected == True:
-        G_list_con, idx_list = remove_disconnected(G_list)
-        # if there is no connected graphs at all, then remain the disconnected ones.
-        if len(G_list_con) > 0: # @todo: ??????????????????????????
-            G_list = G_list_con
-            dis_list = [dis_list[idx] for idx in idx_list]
-            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-
-#    import matplotlib.pyplot as plt 
-#    for g in G_list:
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    
-    # get the best median graphs
-    G_gen_median_list, pi_forward_min_list, sod_gen_median = best_median_graphs(
-            G_list, pi_forward_list, dis_list)
-#    for g in G_gen_median_list:
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    
-    if not allBestOutput:
-        # randomly choose one graph.
-        idx_rdm = random.randint(0, len(G_gen_median_list) - 1)
-        G_gen_median_list = [G_gen_median_list[idx_rdm]]
-    
-    return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median
-
-
-def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
-             dataset='monoterpenoides',
-             graph_dir=''):
-    """Compute the iam by c++ implementation (gedlib) through bash.
-    """
-    import os
-    import time
-
-    def createCollectionFile(Gn_names, y, filename):
-        """Create collection file.
-        """
-        dirname_ds = os.path.dirname(filename)
-        if dirname_ds != '':
-            dirname_ds += '/'
-            if not os.path.exists(dirname_ds) :
-                os.makedirs(dirname_ds)
-                
-        with open(filename + '.xml', 'w') as fgroup:
-            fgroup.write("<?xml version=\"1.0\"?>")
-            fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">")
-            fgroup.write("\n<GraphCollection>")
-            for idx, fname in enumerate(Gn_names):
-                fgroup.write("\n\t<graph file=\"" + fname + "\" class=\"" + str(y[idx]) + "\"/>")
-            fgroup.write("\n</GraphCollection>")
-            fgroup.close()
-
-    tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
-    fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
-    createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection)
-#    fn_collection = tmp_dir + 'collection_for_debug'
-#    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl'
-    
-#    if dataset == 'Letter-high' or dataset == 'Fingerprint':
-#        dataset = 'letter'
-    command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
-    command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
-    command += 'export LD_LIBRARY_PATH\n'
-    command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
-    command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
-            + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' '
-    if edit_cost_constant is None:
-        command += 'None'
-    else:
-        for ec in edit_cost_constant:
-            command += str(ec) + ' '
-#        output = os.system(command)
-    stream = os.popen(command)
-
-    output = stream.readlines()    
-#    print(output)
-    sod_sm = float(output[0].strip())
-    sod_gm = float(output[1].strip())
-    
-    fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-    fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-    
-    return sod_sm, sod_gm, fname_sm, fname_gm
-
-
-
-###############################################################################
-# Old implementations.
-    
-def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', 
-        connected=True):
-    """See my name, then you know what I do.
-    """
-#    Gn = Gn[0:10]
-    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
-    
-    # phase 1: initilize.
-    # compute set-median.
-    dis_min = np.inf
-    pi_p = []
-    pi_all = []
-    for idx1, G_p in enumerate(Gn):
-        dist_sum = 0
-        pi_all.append([])
-        for idx2, G_p_prime in enumerate(Gn):
-            dist_tmp, pi_tmp, _ = GED(G_p, G_p_prime)
-            pi_all[idx1].append(pi_tmp)
-            dist_sum += dist_tmp
-        if dist_sum < dis_min:
-            dis_min = dist_sum
-            G = G_p.copy()
-            idx_min = idx1
-    # list of edit operations.        
-    pi_p = pi_all[idx_min]
-            
-    # phase 2: iteration.
-    ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'], 
-                                      edge_label=edge_label)
-    for itr in range(0, 10): # @todo: the convergence condition?
-        G_new = G.copy()
-        # update vertex labels.
-        # pre-compute h_i0 for each label.
-#        for label in get_node_labels(Gn, node_label):
-#            print(label)
-#        for nd in G.nodes(data=True):
-#            pass
-        if not ds_attrs['node_attr_dim']: # labels are symbolic
-            for nd, _ in G.nodes(data=True):
-                h_i0_list = []
-                label_list = []
-                for label in get_node_labels(Gn, node_label):
-                    h_i0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p[idx][nd]
-                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
-                            h_i0 += 1
-                    h_i0_list.append(h_i0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
-        else: # labels are non-symbolic
-            for nd, _ in G.nodes(data=True):
-                Si_norm = 0
-                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p[idx][nd]
-                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
-                        Si_norm += 1
-                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
-                phi_i_bar /= Si_norm
-                G_new.nodes[nd]['attributes'] = phi_i_bar
-                                            
-        # update edge labels and adjacency matrix.
-        if ds_attrs['edge_labeled']:
-            for nd1, nd2, _ in G.edges(data=True):
-                h_ij0_list = []
-                label_list = []
-                for label in get_edge_labels(Gn, edge_label):
-                    h_ij0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p[idx][nd1]
-                        pi_j = pi_p[idx][nd2]
-                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
-                                   g.has_edge(pi_i, pi_j) and 
-                                   g.edges[pi_i, pi_j][edge_label] == label)
-                        h_ij0 += h_ij0_p
-                    h_ij0_list.append(h_ij0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
-                h_ij0_max = h_ij0_list[idx_max[0]]
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                best_label = label_list[idx_max[idx_rdm]]
-                       
-                # check whether a_ij is 0 or 1.
-                sij_norm = 0
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p[idx][nd1]
-                    pi_j = pi_p[idx][nd2]
-                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                       sij_norm += 1
-                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                    if not G_new.has_edge(nd1, nd2):
-                        G_new.add_edge(nd1, nd2)
-                    G_new.edges[nd1, nd2][edge_label] = best_label
-                else:
-                    if G_new.has_edge(nd1, nd2):
-                        G_new.remove_edge(nd1, nd2)                
-        else: # if edges are unlabeled
-            for nd1, nd2, _ in G.edges(data=True):
-                sij_norm = 0
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p[idx][nd1]
-                    pi_j = pi_p[idx][nd2]
-                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                       sij_norm += 1
-                if sij_norm > len(Gn) * c_er / (c_er + c_ei):
-                    if not G_new.has_edge(nd1, nd2):
-                        G_new.add_edge(nd1, nd2)
-                else:
-                    if G_new.has_edge(nd1, nd2):
-                        G_new.remove_edge(nd1, nd2)
-                        
-        G = G_new.copy()
-        
-        # update pi_p
-        pi_p = []
-        for idx1, G_p in enumerate(Gn):
-            dist_tmp, pi_tmp, _ = GED(G, G_p)
-            pi_p.append(pi_tmp)
-    
-    return G
-
-# --------------------------- These are tests --------------------------------#
-    
-def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1, 
-                                      node_label='atom', edge_label='bond_type'):
-    """See my name, then you know what I do.
-    """
-#    Gn = Gn[0:10]
-    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
-    
-    # phase 1: initilize.
-    # compute set-median.
-    dis_min = np.inf
-#    pi_p = []
-    pi_all_forward = []
-    pi_all_backward = []
-    for idx1, G_p in tqdm(enumerate(G_candidate), desc='computing GEDs', file=sys.stdout):
-        dist_sum = 0
-        pi_all_forward.append([])
-        pi_all_backward.append([])
-        for idx2, G_p_prime in enumerate(Gn):
-            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_p, G_p_prime)
-            pi_all_forward[idx1].append(pi_tmp_forward)
-            pi_all_backward[idx1].append(pi_tmp_backward)
-            dist_sum += dist_tmp
-        if dist_sum <= dis_min:
-            dis_min = dist_sum
-            G = G_p.copy()
-            idx_min = idx1
-    # list of edit operations.        
-    pi_p_forward = pi_all_forward[idx_min]
-    pi_p_backward = pi_all_backward[idx_min]
-            
-    # phase 2: iteration.
-    ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], 
-                                      edge_label=edge_label)
-    label_set = get_node_labels(Gn + [G], node_label)
-    for itr in range(0, 10): # @todo: the convergence condition?
-        G_new = G.copy()
-        # update vertex labels.
-        # pre-compute h_i0 for each label.
-#        for label in get_node_labels(Gn, node_label):
-#            print(label)
-#        for nd in G.nodes(data=True):
-#            pass
-        if not ds_attrs['node_attr_dim']: # labels are symbolic
-            for nd in G.nodes():
-                h_i0_list = []
-                label_list = []
-                for label in label_set:
-                    h_i0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p_forward[idx][nd]
-                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
-                            h_i0 += 1
-                    h_i0_list.append(h_i0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
-        else: # labels are non-symbolic
-            for nd in G.nodes():
-                Si_norm = 0
-                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p_forward[idx][nd]
-                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
-                        Si_norm += 1
-                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
-                phi_i_bar /= Si_norm
-                G_new.nodes[nd]['attributes'] = phi_i_bar
-                                            
-        # update edge labels and adjacency matrix.
-        if ds_attrs['edge_labeled']:
-            for nd1, nd2, _ in G.edges(data=True):
-                h_ij0_list = []
-                label_list = []
-                for label in get_edge_labels(Gn, edge_label):
-                    h_ij0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p_forward[idx][nd1]
-                        pi_j = pi_p_forward[idx][nd2]
-                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
-                                   g.has_edge(pi_i, pi_j) and 
-                                   g.edges[pi_i, pi_j][edge_label] == label)
-                        h_ij0 += h_ij0_p
-                    h_ij0_list.append(h_ij0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
-                h_ij0_max = h_ij0_list[idx_max[0]]
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                best_label = label_list[idx_max[idx_rdm]]
-                       
-                # check whether a_ij is 0 or 1.
-                sij_norm = 0
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p_forward[idx][nd1]
-                    pi_j = pi_p_forward[idx][nd2]
-                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                       sij_norm += 1
-                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                    if not G_new.has_edge(nd1, nd2):
-                        G_new.add_edge(nd1, nd2)
-                    G_new.edges[nd1, nd2][edge_label] = best_label
-                else:
-                    if G_new.has_edge(nd1, nd2):
-                        G_new.remove_edge(nd1, nd2)                
-        else: # if edges are unlabeled
-            # @todo: works only for undirected graphs.
-            for nd1 in range(nx.number_of_nodes(G)):
-                for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
-                    sij_norm = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p_forward[idx][nd1]
-                        pi_j = pi_p_forward[idx][nd2]
-                        if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                           sij_norm += 1
-                    if sij_norm > len(Gn) * c_er / (c_er + c_ei):
-                        if not G_new.has_edge(nd1, nd2):
-                            G_new.add_edge(nd1, nd2)
-                    elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
-                        if G_new.has_edge(nd1, nd2):
-                            G_new.remove_edge(nd1, nd2)
-                    # do not change anything when equal.
-                        
-        G = G_new.copy()
-        
-        # update pi_p
-        pi_p_forward = []
-        for G_p in Gn:
-            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
-            pi_p_forward.append(pi_tmp_forward)
-    
-    return G
-
-
-###############################################################################
-
-if __name__ == '__main__':
-    from gklearn.utils.graphfiles import loadDataset
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
-          'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}  # node/edge symb
-#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-#          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-
-    iam(Gn)
\ No newline at end of file
diff --git a/gklearn/preimage/knn.py b/gklearn/preimage/knn.py
deleted file mode 100644
index c179287..0000000
--- a/gklearn/preimage/knn.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jan 10 13:22:04 2020
-
-@author: ljia
-"""
-import numpy as np
-#import matplotlib.pyplot as plt
-from tqdm import tqdm
-import random
-#import csv
-from shutil import copyfile
-import os
-
-from gklearn.preimage.iam import iam_bash
-from gklearn.utils.graphfiles import loadDataset, loadGXL
-from gklearn.preimage.ged import GED
-from gklearn.preimage.utils import get_same_item_indices
-
-def test_knn():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-#    gkernel = 'treeletkernel'
-#    node_label = 'atom'
-#    edge_label = 'bond_type'
-#    ds_name = 'mono'
-    dir_output = 'results/knn/'
-    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'
-    
-    k_nn = 1
-    percent = 0.1
-    repeats = 50
-    edit_cost_constant = [3, 3, 1, 3, 3, 1]
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    sod_sm_list_list
-    for repeat in range(0, repeats):
-        print('\n---------------------------------')
-        print('repeat =', repeat)
-        accuracy_sm_list = []
-        accuracy_gm_list = []
-        sod_sm_list = []
-        sod_gm_list = []
-        
-        random.seed(repeat)
-        set_median_list = []
-        gen_median_list = []
-        train_y_set = []
-        for y, values in y_idx.items():
-            print('\ny =', y)
-            size_median_set = int(len(values) * percent)
-            median_set_idx = random.sample(values, size_median_set)
-            print('median set: ', median_set_idx)
-            
-            # compute set median and gen median using IAM (C++ through bash).
-    #        Gn_median = [Gn[idx] for idx in median_set_idx]
-            group_fnames = [Gn[g].graph['filename'] for g in median_set_idx]
-            sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant,
-                                                          graph_dir=graph_dir)
-            print('sod_sm, sod_gm:', sod_sm, sod_gm)
-            sod_sm_list.append(sod_sm)
-            sod_gm_list.append(sod_gm)
-            fname_sm_new = dir_output + 'medians/set_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-            copyfile(fname_sm, fname_sm_new)
-            fname_gm_new = dir_output + 'medians/gen_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-            copyfile(fname_gm, fname_gm_new)
-            set_median_list.append(loadGXL(fname_sm_new))
-            gen_median_list.append(loadGXL(fname_gm_new))
-            train_y_set.append(int(y))
-        
-        print(sod_sm, sod_gm)
-        
-        # do 1-nn.
-        test_y_set = [int(y) for y in y_all]
-        accuracy_sm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
-        accuracy_gm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
-        accuracy_sm_list.append(accuracy_sm)
-        accuracy_gm_list.append(accuracy_gm)
-        print('current accuracy sm and gm:', accuracy_sm, accuracy_gm)
-        
-    # output
-    accuracy_sm_mean = np.mean(accuracy_sm_list)
-    accuracy_gm_mean = np.mean(accuracy_gm_list)
-    print('\ntotal average accuracy sm and gm:', accuracy_sm_mean, accuracy_gm_mean)
-
-        
-def knn(train_set, train_y_set, test_set, test_y_set, k=1, distance='ged'):
-    if k == 1 and distance == 'ged':
-        algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                    'algo_options': algo_options, 'stabilizer': None}
-        accuracy = 0
-        for idx_test, g_test in tqdm(enumerate(test_set), desc='computing 1-nn', 
-                                     file=sys.stdout):
-            dis = np.inf
-            for idx_train, g_train in enumerate(train_set):
-                dis_cur, _, _ = GED(g_test, g_train, **params_ged)
-                if dis_cur < dis:
-                    dis = dis_cur
-                    test_y_cur = train_y_set[idx_train]
-            if test_y_cur == test_y_set[idx_test]:
-                accuracy += 1
-        accuracy = accuracy / len(test_set)
-        
-    return accuracy
-
-    
-
-if __name__ == '__main__':
-    test_knn()
\ No newline at end of file
diff --git a/gklearn/preimage/libs.py b/gklearn/preimage/libs.py
deleted file mode 100644
index 76005c6..0000000
--- a/gklearn/preimage/libs.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import sys
-import pathlib
-
-# insert gedlibpy library.
-sys.path.insert(0, "../../../")
-from gedlibpy import librariesImport, gedlibpy
diff --git a/gklearn/preimage/median.py b/gklearn/preimage/median.py
deleted file mode 100644
index 1c5bb0f..0000000
--- a/gklearn/preimage/median.py
+++ /dev/null
@@ -1,218 +0,0 @@
-import sys
-sys.path.insert(0, "../")
-#import pathlib
-import numpy as np
-import networkx as nx
-import time
-
-from gedlibpy import librariesImport, gedlibpy
-#import script
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-from gklearn.utils.graphfiles import loadDataset
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
-    """
-    Replace a graph in script
-
-    If old_id is -1, add a new graph to the environnemt
-
-    """
-    if(old_id > -1):
-        script.PyClearGraph(old_id)
-    new_id = script.PyAddGraph(label)
-    for i in graph.nodes():
-        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
-    for e in graph.edges:
-        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    return new_id
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, savepath=''):
-    import numpy as np
-    import networkx as nx
-    import matplotlib.pyplot as plt
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['attributes'][0]),
-           float(graph.node[n]['attributes'][1])])
-    nx.draw_networkx(graph, pos)
-    if savepath != '':
-        plt.savefig(savepath + str(time.time()) + '.eps', format='eps', dpi=300)
-    plt.show()
-    plt.clf()
-    
-#compute new mappings
-def update_mappings(script,median_id,listID):
-    med_distances = {}
-    med_mappings = {}
-    sod = 0
-    for i in range(0,len(listID)):
-        script.PyRunMethod(median_id,listID[i])
-        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
-        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
-        sod += med_distances[i]
-    return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
-    s_ij = 0
-    for k in range(0,len(all_mappings)):
-        cur_graph =  all_graphs[k]
-        cur_mapping = all_mappings[k]
-        size_graph = cur_graph.order()
-        if ((cur_mapping[i] < size_graph) and 
-            (cur_mapping[j] < size_graph) and 
-            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
-                s_ij += 1
-        
-    return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-#     from scipy.stats.mstats import gmean
-
-#     for i in median.nodes():
-#         for k in listIdSet:
-#             vectors = [] #np.zeros((len(listIdSet),2))
-#             if(k != median_id):
-#                 phi_i = mappings[k][i]
-#                 if(phi_i < dataset[k].order()):
-#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-#         new_labels = gmean(vectors)
-#         median.node[i]['x'] = str(new_labels[0])
-#         median.node[i]['y'] = str(new_labels[1])
-#     return median
-
-def update_median_nodes(median,dataset,mappings):
-    #update node attributes
-    for i in median.nodes():
-        nb_sub=0
-        mean_label = {'x' : 0, 'y' : 0}
-        for k in range(0,len(mappings)):
-            phi_i = mappings[k][i]
-            if ( phi_i < dataset[k].order() ):
-                nb_sub += 1
-                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
-                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
-        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
-        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
-    return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
-    size_dataset = len(dataset)
-    ratio_cei_cer = cer/(cei + cer)
-    threshold = size_dataset*ratio_cei_cer
-    order_graph_median = median.order()
-    for i in range(0,order_graph_median):
-        for j in range(i+1,order_graph_median):
-            s_ij = calcul_Sij(mappings,dataset,i,j)
-            if(s_ij > threshold):
-                median.add_edge(i,j)
-            else:
-                if(median.has_edge(i,j)):
-                    median.remove_edge(i,j)
-    return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
-    """Compute a graph median of a dataset according to an environment
-
-    Parameters
-
-    script : An gedlib initialized environnement 
-    listID (list): a list of ID in script: encodes the dataset 
-    dataset (list): corresponding graphs in networkX format. We assume that graph
-    listID[i] corresponds to dataset[i]
-
-    Returns:
-    A networkX graph, which is the median, with corresponding sod
-    """
-    print(len(listID))
-    median_set_index, median_set_sod = compute_median_set(script, listID)
-    print(median_set_index)
-    print(median_set_sod)
-    sods = []
-    #Ajout median dans environnement
-    set_median = dataset[median_set_index].copy()
-    median = dataset[median_set_index].copy()
-    cur_med_id = replace_graph_in_env(script,median,-1)
-    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-    sods.append(cur_sod)
-    if(verbose):
-        print(cur_sod)
-    ite_max = 50
-    old_sod = cur_sod * 2
-    ite = 0
-    epsilon = 0.001
-
-    best_median 
-    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
-        median = update_median_nodes(median,dataset, med_mappings)
-        median = update_median_edges(dataset,med_mappings,median)
-
-        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
-        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-        
-        
-        sods.append(cur_sod)
-        if(verbose):
-            print(cur_sod)
-        ite += 1
-    return median, cur_sod, sods, set_median
-    
-    draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
-    'Returns the id in listID corresponding to median set'
-    #Calcul median set
-    N=len(listID)
-    map_id_to_index = {}
-    map_index_to_id = {}
-    for i in range(0,len(listID)):
-        map_id_to_index[listID[i]] = i
-        map_index_to_id[i] = listID[i]
-        
-    distances = np.zeros((N,N))
-    for i in listID:
-        for j in listID:
-            script.PyRunMethod(i,j)
-            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
-    median_set_index = np.argmin(np.sum(distances,0))
-    sod = np.min(np.sum(distances,0))
-    
-    return median_set_index, sod
-
-if __name__ == "__main__":
-    #Chargement du dataset
-    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
-    script.PySetEditCost("LETTER")
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
-    listID = script.PyGetAllGraphIds()
-    median, sod = compute_median(script,listID,dataset,verbose=True)
-    
-    print(sod)
-    draw_Letter_graph(median)
-
-
-#if __name__ == '__main__':
-#    # test draw_Letter_graph
-#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-#          'extra_params': {}} # node nsymb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    print(y_all)
-#    for g in Gn:
-#        draw_Letter_graph(g)
\ No newline at end of file
diff --git a/gklearn/preimage/median_benoit.py b/gklearn/preimage/median_benoit.py
deleted file mode 100644
index 6712196..0000000
--- a/gklearn/preimage/median_benoit.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import sys
-import pathlib
-import numpy as np
-import networkx as nx
-
-import librariesImport
-import script
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
-    """
-    Replace a graph in script
-
-    If old_id is -1, add a new graph to the environnemt
-
-    """
-    if(old_id > -1):
-        script.PyClearGraph(old_id)
-    new_id = script.PyAddGraph(label)
-    for i in graph.nodes():
-        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
-    for e in graph.edges:
-        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    return new_id
-    
-#Dessin median courrant
-def draw_Letter_graph(graph):
-    import numpy as np
-    import networkx as nx
-    import matplotlib.pyplot as plt
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph,pos)
-    plt.show()
-    
-#compute new mappings
-def update_mappings(script,median_id,listID):
-    med_distances = {}
-    med_mappings = {}
-    sod = 0
-    for i in range(0,len(listID)):
-        script.PyRunMethod(median_id,listID[i])
-        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
-        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
-        sod += med_distances[i]
-    return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
-    s_ij = 0
-    for k in range(0,len(all_mappings)):
-        cur_graph =  all_graphs[k]
-        cur_mapping = all_mappings[k]
-        size_graph = cur_graph.order()
-        if ((cur_mapping[i] < size_graph) and 
-            (cur_mapping[j] < size_graph) and 
-            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
-                s_ij += 1
-        
-    return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-#     from scipy.stats.mstats import gmean
-
-#     for i in median.nodes():
-#         for k in listIdSet:
-#             vectors = [] #np.zeros((len(listIdSet),2))
-#             if(k != median_id):
-#                 phi_i = mappings[k][i]
-#                 if(phi_i < dataset[k].order()):
-#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-#         new_labels = gmean(vectors)
-#         median.node[i]['x'] = str(new_labels[0])
-#         median.node[i]['y'] = str(new_labels[1])
-#     return median
-
-def update_median_nodes(median,dataset,mappings):
-    #update node attributes
-    for i in median.nodes():
-        nb_sub=0
-        mean_label = {'x' : 0, 'y' : 0}
-        for k in range(0,len(mappings)):
-            phi_i = mappings[k][i]
-            if ( phi_i < dataset[k].order() ):
-                nb_sub += 1
-                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
-                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
-        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
-        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
-    return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
-    size_dataset = len(dataset)
-    ratio_cei_cer = cer/(cei + cer)
-    threshold = size_dataset*ratio_cei_cer
-    order_graph_median = median.order()
-    for i in range(0,order_graph_median):
-        for j in range(i+1,order_graph_median):
-            s_ij = calcul_Sij(mappings,dataset,i,j)
-            if(s_ij > threshold):
-                median.add_edge(i,j)
-            else:
-                if(median.has_edge(i,j)):
-                    median.remove_edge(i,j)
-    return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
-    """Compute a graph median of a dataset according to an environment
-
-    Parameters
-
-    script : An gedlib initialized environnement 
-    listID (list): a list of ID in script: encodes the dataset 
-    dataset (list): corresponding graphs in networkX format. We assume that graph
-    listID[i] corresponds to dataset[i]
-
-    Returns:
-    A networkX graph, which is the median, with corresponding sod
-    """
-    print(len(listID))
-    median_set_index, median_set_sod = compute_median_set(script, listID)
-    print(median_set_index)
-    print(median_set_sod)
-    sods = []
-    #Ajout median dans environnement
-    set_median = dataset[median_set_index].copy()
-    median = dataset[median_set_index].copy()
-    cur_med_id = replace_graph_in_env(script,median,-1)
-    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-    sods.append(cur_sod)
-    if(verbose):
-        print(cur_sod)
-    ite_max = 50
-    old_sod = cur_sod * 2
-    ite = 0
-    epsilon = 0.001
-
-    best_median 
-    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
-        median = update_median_nodes(median,dataset, med_mappings)
-        median = update_median_edges(dataset,med_mappings,median)
-
-        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
-        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-        
-        
-        sods.append(cur_sod)
-        if(verbose):
-            print(cur_sod)
-        ite += 1
-    return median, cur_sod, sods, set_median
-    
-    draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
-    'Returns the id in listID corresponding to median set'
-    #Calcul median set
-    N=len(listID)
-    map_id_to_index = {}
-    map_index_to_id = {}
-    for i in range(0,len(listID)):
-        map_id_to_index[listID[i]] = i
-        map_index_to_id[i] = listID[i]
-        
-    distances = np.zeros((N,N))
-    for i in listID:
-        for j in listID:
-            script.PyRunMethod(i,j)
-            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
-    median_set_index = np.argmin(np.sum(distances,0))
-    sod = np.min(np.sum(distances,0))
-    
-    return median_set_index, sod
-
-if __name__ == "__main__":
-    #Chargement du dataset
-    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
-    script.PySetEditCost("LETTER")
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
-    listID = script.PyGetAllGraphIds()
-    median, sod = compute_median(script,listID,dataset,verbose=True)
-    
-    print(sod)
-    draw_Letter_graph(median)
diff --git a/gklearn/preimage/median_linlin.py b/gklearn/preimage/median_linlin.py
deleted file mode 100644
index 6139558..0000000
--- a/gklearn/preimage/median_linlin.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import sys
-import pathlib
-import numpy as np
-import networkx as nx
-
-from gedlibpy import librariesImport, gedlibpy
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
-    """
-    Replace a graph in script
-
-    If old_id is -1, add a new graph to the environnemt
-
-    """
-    if(old_id > -1):
-        script.PyClearGraph(old_id)
-    new_id = script.PyAddGraph(label)
-    for i in graph.nodes():
-        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
-    for e in graph.edges:
-        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    return new_id
-    
-#Dessin median courrant
-def draw_Letter_graph(graph):
-    import numpy as np
-    import networkx as nx
-    import matplotlib.pyplot as plt
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph,pos)
-    plt.show()
-    
-#compute new mappings
-def update_mappings(script,median_id,listID):
-    med_distances = {}
-    med_mappings = {}
-    sod = 0
-    for i in range(0,len(listID)):
-        script.PyRunMethod(median_id,listID[i])
-        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
-        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
-        sod += med_distances[i]
-    return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
-    s_ij = 0
-    for k in range(0,len(all_mappings)):
-        cur_graph =  all_graphs[k]
-        cur_mapping = all_mappings[k]
-        size_graph = cur_graph.order()
-        if ((cur_mapping[i] < size_graph) and 
-            (cur_mapping[j] < size_graph) and 
-            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
-                s_ij += 1
-        
-    return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-#     from scipy.stats.mstats import gmean
-
-#     for i in median.nodes():
-#         for k in listIdSet:
-#             vectors = [] #np.zeros((len(listIdSet),2))
-#             if(k != median_id):
-#                 phi_i = mappings[k][i]
-#                 if(phi_i < dataset[k].order()):
-#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-#         new_labels = gmean(vectors)
-#         median.node[i]['x'] = str(new_labels[0])
-#         median.node[i]['y'] = str(new_labels[1])
-#     return median
-
-def update_median_nodes(median,dataset,mappings):
-    #update node attributes
-    for i in median.nodes():
-        nb_sub=0
-        mean_label = {'x' : 0, 'y' : 0}
-        for k in range(0,len(mappings)):
-            phi_i = mappings[k][i]
-            if ( phi_i < dataset[k].order() ):
-                nb_sub += 1
-                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
-                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
-        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
-        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
-    return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
-    size_dataset = len(dataset)
-    ratio_cei_cer = cer/(cei + cer)
-    threshold = size_dataset*ratio_cei_cer
-    order_graph_median = median.order()
-    for i in range(0,order_graph_median):
-        for j in range(i+1,order_graph_median):
-            s_ij = calcul_Sij(mappings,dataset,i,j)
-            if(s_ij > threshold):
-                median.add_edge(i,j)
-            else:
-                if(median.has_edge(i,j)):
-                    median.remove_edge(i,j)
-    return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
-    """Compute a graph median of a dataset according to an environment
-
-    Parameters
-
-    script : An gedlib initialized environnement 
-    listID (list): a list of ID in script: encodes the dataset 
-    dataset (list): corresponding graphs in networkX format. We assume that graph
-    listID[i] corresponds to dataset[i]
-
-    Returns:
-    A networkX graph, which is the median, with corresponding sod
-    """
-    print(len(listID))
-    median_set_index, median_set_sod = compute_median_set(script, listID)
-    print(median_set_index)
-    print(median_set_sod)
-    sods = []
-    #Ajout median dans environnement
-    set_median = dataset[median_set_index].copy()
-    median = dataset[median_set_index].copy()
-    cur_med_id = replace_graph_in_env(script,median,-1)
-    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-    sods.append(cur_sod)
-    if(verbose):
-        print(cur_sod)
-    ite_max = 50
-    old_sod = cur_sod * 2
-    ite = 0
-    epsilon = 0.001
-
-    best_median 
-    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
-        median = update_median_nodes(median,dataset, med_mappings)
-        median = update_median_edges(dataset,med_mappings,median)
-
-        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
-        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-        
-        
-        sods.append(cur_sod)
-        if(verbose):
-            print(cur_sod)
-        ite += 1
-    return median, cur_sod, sods, set_median
-    
-    draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
-    'Returns the id in listID corresponding to median set'
-    #Calcul median set
-    N=len(listID)
-    map_id_to_index = {}
-    map_index_to_id = {}
-    for i in range(0,len(listID)):
-        map_id_to_index[listID[i]] = i
-        map_index_to_id[i] = listID[i]
-        
-    distances = np.zeros((N,N))
-    for i in listID:
-        for j in listID:
-            script.PyRunMethod(i,j)
-            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
-    median_set_index = np.argmin(np.sum(distances,0))
-    sod = np.min(np.sum(distances,0))
-    
-    return median_set_index, sod
-
-def _convertGraph(G):
-    """Convert a graph to the proper NetworkX format that can be
-    recognized by library gedlibpy.
-    """
-    G_new = nx.Graph()
-    for nd, attrs in G.nodes(data=True):
-        G_new.add_node(str(nd), chem=attrs['atom'])
-#                G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
-#                               y=str(attrs['attributes'][1]))
-    for nd1, nd2, attrs in G.edges(data=True):
-        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-#                G_new.add_edge(str(nd1), str(nd2))
-        
-    return G_new
-
-if __name__ == "__main__":
-    #Chargement du dataset
-    gedlibpy.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
-    gedlibpy.PySetEditCost("LETTER")
-    gedlibpy.PyInitEnv()
-    gedlibpy.PySetMethod("IPFP", "")
-    gedlibpy.PyInitMethod()
-
-    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
-    listID = gedlibpy.PyGetAllGraphIds()
-    median, sod = compute_median(gedlibpy,listID,dataset,verbose=True)
-    
-    print(sod)
-    draw_Letter_graph(median)
diff --git a/gklearn/preimage/pathfrequency.py b/gklearn/preimage/pathfrequency.py
deleted file mode 100644
index 3bca1bc..0000000
--- a/gklearn/preimage/pathfrequency.py
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar 20 10:12:15 2019
-
-inferring a graph grom path frequency.
-@author: ljia
-"""
-#import numpy as np
-import networkx as nx
-from scipy.spatial.distance import hamming
-import itertools
-
-def SISF(K, v):
-    if output:
-        return output
-    else:
-        return 'no solution'
-
-    
-def SISF_M(K, v):
-    return output
-
-
-def GIPF_tree(v_obj, K=1, alphabet=[0, 1]):
-    if K == 1:
-        n_graph = v_obj[0] + v_obj[1]
-        D_T, father_idx = getDynamicTable(n_graph, alphabet)
-        
-        # get the vector the closest to v_obj.
-        if v_obj not in D_T:
-            print('no exact solution')
-            dis_lim = 1 / len(v_obj) # the possible shortest distance.
-            dis_min = 1.0 # minimum proportional distance
-            v_min = v_obj
-            for vc in D_T:
-                if vc[0] + vc[1] == n_graph:
-#                    print(vc)
-                    dis = hamming(vc, v_obj)
-                    if dis < dis_min:
-                        dis_min = dis
-                        v_min = vc
-                    if dis_min <= dis_lim:
-                        break
-            v_obj = v_min
-            
-        # obtain required graph by traceback procedure.        
-        return getObjectGraph(v_obj, D_T, father_idx, alphabet), v_obj
-    
-def GIPF_M(K, v):
-    return G
-
-
-def getDynamicTable(n_graph, alphabet=[0, 1]):
-    # init. When only one node exists.
-    D_T = {(1, 0, 0, 0, 0, 0): 1, (0, 1, 0, 0, 0, 0): 1, (0, 0, 1, 0, 0, 0): 0, 
-           (0, 0, 0, 1, 0, 0): 0, (0, 0, 0, 0, 1, 0): 0, (0, 0, 0, 0, 0, 1): 0,}
-    D_T = [(1, 0, 0, 0, 0, 0), (0, 1, 0, 0, 0, 0)]
-    father_idx = [-1, -1] # index of each vector's father
-    # add possible vectors.
-    for idx, v in enumerate(D_T):
-        if v[0] + v[1] < n_graph:
-            D_T.append((v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5]))
-            D_T.append((v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5]))
-            D_T.append((v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5]))
-            D_T.append((v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2))
-            father_idx += [idx, idx, idx, idx]
-    
-#    D_T = itertools.chain([(1, 0, 0, 0, 0, 0)], [(0, 1, 0, 0, 0, 0)])
-#    father_idx = itertools.chain([-1], [-1]) # index of each vector's father
-#    # add possible vectors.
-#    for idx, v in enumerate(D_T):
-#        if v[0] + v[1] < n_graph:
-#            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])])
-#            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])])
-#            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])])
-#            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)])
-#            father_idx = itertools.chain(father_idx, [idx, idx, idx, idx])
-    return D_T, father_idx
-
-
-def getObjectGraph(v_obj, D_T, father_idx, alphabet=[0, 1]):
-    g_obj = nx.Graph()
-    
-    # do vector traceback.
-    v_tb = [list(v_obj)] # traceback vectors.
-    v_tb_idx = [D_T.index(v_obj)] # indices of traceback vectors.
-    while v_tb_idx[-1] > 1:
-        idx_pre = father_idx[v_tb_idx[-1]]
-        v_tb_idx.append(idx_pre)
-        v_tb.append(list(D_T[idx_pre]))
-    v_tb = v_tb[::-1] # reverse
-#    v_tb_idx = v_tb_idx[::-1]
-
-    # construct tree.
-    v_c = v_tb[0] # current vector.
-    if v_c[0] == 1:
-        g_obj.add_node(0, node_label=alphabet[0])
-    else:
-        g_obj.add_node(0, node_label=alphabet[1])
-    for vct in v_tb[1:]:
-        if vct[0] - v_c[0] == 1:
-            if vct[2] - v_c[2] == 2: # transfer 1
-                label1 = alphabet[0]
-                label2 = alphabet[0]
-            else: # transfer 2
-                label1 = alphabet[1]
-                label2 = alphabet[0]
-        else: 
-            if vct[3] - v_c[3] == 1: # transfer 3
-                label1 = alphabet[0]
-                label2 = alphabet[1]
-            else: # transfer 4
-                label1 = alphabet[1]
-                label2 = alphabet[1]
-        for nd, attr in g_obj.nodes(data=True):
-            if attr['node_label'] == label1:
-                nb_node = nx.number_of_nodes(g_obj)
-                g_obj.add_node(nb_node, node_label=label2)
-                g_obj.add_edge(nd, nb_node)
-                break
-        v_c = vct
-    return g_obj
-
-
-import random
-def hierarchy_pos(G, root=None, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5):
-
-    '''
-    From Joel's answer at https://stackoverflow.com/a/29597209/2966723.  
-    Licensed under Creative Commons Attribution-Share Alike 
-
-    If the graph is a tree this will return the positions to plot this in a 
-    hierarchical layout.
-
-    G: the graph (must be a tree)
-
-    root: the root node of current branch 
-    - if the tree is directed and this is not given, 
-      the root will be found and used
-    - if the tree is directed and this is given, then 
-      the positions will be just for the descendants of this node.
-    - if the tree is undirected and not given, 
-      then a random choice will be used.
-
-    width: horizontal space allocated for this branch - avoids overlap with other branches
-
-    vert_gap: gap between levels of hierarchy
-
-    vert_loc: vertical location of root
-
-    xcenter: horizontal location of root
-    '''
-    if not nx.is_tree(G):
-        raise TypeError('cannot use hierarchy_pos on a graph that is not a tree')
-
-    if root is None:
-        if isinstance(G, nx.DiGraph):
-            root = next(iter(nx.topological_sort(G)))  #allows back compatibility with nx version 1.11
-        else:
-            root = random.choice(list(G.nodes))
-
-    def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None):
-        '''
-        see hierarchy_pos docstring for most arguments
-
-        pos: a dict saying where all nodes go if they have been assigned
-        parent: parent of this branch. - only affects it if non-directed
-
-        '''
-
-        if pos is None:
-            pos = {root:(xcenter,vert_loc)}
-        else:
-            pos[root] = (xcenter, vert_loc)
-        children = list(G.neighbors(root))
-        if not isinstance(G, nx.DiGraph) and parent is not None:
-            children.remove(parent)  
-        if len(children)!=0:
-            dx = width/len(children) 
-            nextx = xcenter - width/2 - dx/2
-            for child in children:
-                nextx += dx
-                pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, 
-                                    vert_loc = vert_loc-vert_gap, xcenter=nextx,
-                                    pos=pos, parent = root)
-        return pos
-
-
-    return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
-
-
-if __name__ == '__main__':
-    v_obj = (6, 4, 10, 3, 3, 2)
-#    v_obj = (6, 5, 10, 3, 3, 2)
-    tree_obj, v_obj = GIPF_tree(v_obj)
-    print('One closest vector is', v_obj)
-    # plot
-    pos = hierarchy_pos(tree_obj, 0) 
-    node_labels = nx.get_node_attributes(tree_obj, 'node_label')
-    nx.draw(tree_obj, pos=pos, labels=node_labels, with_labels=True)
\ No newline at end of file
diff --git a/gklearn/preimage/preimage_iam.py b/gklearn/preimage/preimage_iam.py
deleted file mode 100644
index bf79d0e..0000000
--- a/gklearn/preimage/preimage_iam.py
+++ /dev/null
@@ -1,705 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Apr 30 17:07:43 2019
-
-A graph pre-image method combining iterative pre-image method in reference [1] 
-and the iterative alternate minimizations (IAM) in reference [2].
-@author: ljia
-@references:
-    [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph 
-    pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
-    [2] Generalized median graph via iterative alternate minimization.
-"""
-import sys
-import numpy as np
-from tqdm import tqdm
-import networkx as nx
-import matplotlib.pyplot as plt
-import random
-
-from iam import iam_upgraded
-from utils import dis_gstar, compute_kernel
-
-
-def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
-                 gkernel, epsilon=0.001, InitIAMWithAllDk=False,
-                 params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
-                             'ite_max': 50, 'epsilon': 0.001, 
-                             'removeNodes': True, 'connected': False},
-                 params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
-                             'edit_cost_constant': [], 'stabilizer': 'min', 
-                             'repeat': 50}):
-    """This function constructs graph pre-image by the iterative pre-image 
-    framework in reference [1], algorithm 1, where the step of generating new 
-    graphs randomly is replaced by the IAM algorithm in reference [2].
-    
-    notes
-    -----
-    Every time a set of n better graphs is acquired, their distances in kernel space are
-    compared with the k nearest ones, and the k nearest distances from the k+n
-    distances will be used as the new ones.
-    """
-    # compute k nearest neighbors of phi in DN.
-    dis_all = [] # distance between g_star and each graph.
-    term3 = 0
-    for i1, a1 in enumerate(alpha):
-        for i2, a2 in enumerate(alpha):
-            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
-        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
-        dis_all.append(dtemp)
-        
-    # sort
-    sort_idx = np.argsort(dis_all)
-    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
-    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
-    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
-    if dis_k[0] == 0: # the exact pre-image.
-        print('The exact pre-image is found from the input dataset.')
-        return 0, ghat_list, 0, 0
-    dhat = dis_k[0] # the nearest distance
-#    for g in ghat_list:
-#        draw_Letter_graph(g)
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
-##        nx.draw_networkx(gi)
-#        plt.show()
-##        draw_Letter_graph(g)
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-    
-#    i = 1
-    r = 0
-    itr_total = 0
-    dis_of_each_itr = [dhat]
-    found = False
-    nb_updated = 0
-    nb_updated_k = 0
-    while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
-        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
-        print('Current preimage iteration =', r)
-        print('Total preimage iteration =', itr_total, '\n')
-        found = False
-        
-        Gn_nearest_median = [g.copy() for g in Gk]
-        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
-            ghat_new_list = []
-            for g_tmp in Gk:
-                Gn_nearest_init = [g_tmp.copy()]
-                ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median, 
-                        Gn_nearest_init, params_ged=params_ged, **params_iam)
-                ghat_new_list += ghat_new_list_tmp
-        else: # only the best graph in D_k is used to initialize IAM.
-            Gn_nearest_init = [g.copy() for g in Gk]
-            ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
-                    params_ged=params_ged, **params_iam)
-
-#        for g in g_tmp_list:
-#            nx.draw_networkx(g)
-#            plt.show()
-#            draw_Letter_graph(g)
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-        dhat_new_list = []
-        for idx, g_tmp in enumerate(ghat_new_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                                len(ghat_new_list) + len(Gn_median) + 1), 
-                                alpha, knew, withterm3=False))
-        
-        for idx_g, ghat_new in enumerate(ghat_new_list):          
-            dhat_new = dhat_new_list[idx_g]
-            
-            # if the new distance is smaller than the max of D_k.           
-            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
-                # check if the new distance is the same as one in D_k.
-                is_duplicate = False
-                for dis_tmp in dis_k[1:-1]:
-                    if np.abs(dhat_new - dis_tmp) < epsilon:
-                        is_duplicate = True
-                        print('IAM: duplicate k nearest graph generated.')
-                        break
-                if not is_duplicate:
-                    if np.abs(dhat_new - dhat) < epsilon:
-                        print('IAM: I am equal!')
-#                        dhat = dhat_new
-#                        ghat_list = [ghat_new.copy()]
-                    else:
-                        print('IAM: we got better k nearest neighbors!')
-                        nb_updated_k += 1
-                        print('the k nearest neighbors are updated', 
-                              nb_updated_k, 'times.')
-                        
-                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
-                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
-                        sort_idx = np.argsort(dis_k)
-                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
-                        if dhat_new < dhat:
-                            print('IAM: I have smaller distance!')
-                            print(str(dhat) + '->' + str(dhat_new))
-                            dhat = dhat_new
-                            ghat_list = [Gk[0].copy()]
-                            r = 0
-                            nb_updated += 1
-                        
-                            print('the graph is updated', nb_updated, 'times.')                       
-                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
-                                with_labels=True)
-                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-                            plt.show()
-                        
-                        found = True
-        if not found:
-            r += 1            
-
-        dis_of_each_itr.append(dhat)
-        itr_total += 1
-        print('\nthe k shortest distances are', dis_k)
-        print('the shortest distances for previous iterations are', dis_of_each_itr)
-        
-    print('\n\nthe graph is updated', nb_updated, 'times.')
-    print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
-    print('distances in kernel space:', dis_of_each_itr, '\n')
-    
-    return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
-
-
-
-
-def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
-                            l_max, gkernel, epsilon=0.001, 
-                            InitIAMWithAllDk=False, InitRandomWithAllDk=True,
-                            params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
-                                        'ite_max': 50, 'epsilon': 0.001, 
-                                        'removeNodes': True, 'connected': False},
-                            params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 
-                                        'method': 'IPFP', 'edit_cost_constant': [], 
-                                        'stabilizer': 'min', 'repeat': 50}):
-    """This function constructs graph pre-image by the iterative pre-image 
-    framework in reference [1], algorithm 1, where new graphs are generated 
-    randomly and by the IAM algorithm in reference [2].
-    
-    notes
-    -----
-    Every time a set of n better graphs is acquired, their distances in kernel space are
-    compared with the k nearest ones, and the k nearest distances from the k+n
-    distances will be used as the new ones.
-    """
-    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
-    # compute k nearest neighbors of phi in DN.
-    dis_all = [] # distance between g_star and each graph.
-    term3 = 0
-    for i1, a1 in enumerate(alpha):
-        for i2, a2 in enumerate(alpha):
-            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
-        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
-        dis_all.append(dtemp)
-        
-    # sort
-    sort_idx = np.argsort(dis_all)
-    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
-    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
-    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
-    if dis_k[0] == 0: # the exact pre-image.
-        print('The exact pre-image is found from the input dataset.')
-        return 0, ghat_list, 0, 0
-    dhat = dis_k[0] # the nearest distance
-#    for g in ghat_list:
-#        draw_Letter_graph(g)
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
-##        nx.draw_networkx(gi)
-#        plt.show()
-##        draw_Letter_graph(g)
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-    
-    r = 0
-    itr_total = 0
-    dis_of_each_itr = [dhat]
-    nb_updated_iam = 0
-    nb_updated_k_iam = 0
-    nb_updated_random = 0
-    nb_updated_k_random = 0
-#    is_iam_duplicate = False
-    while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
-        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
-        print('Current preimage iteration =', r)
-        print('Total preimage iteration =', itr_total, '\n')
-        found_iam = False
-
-        Gn_nearest_median = [g.copy() for g in Gk]
-        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
-            ghat_new_list = []
-            for g_tmp in Gk:
-                Gn_nearest_init = [g_tmp.copy()]
-                ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, 
-                        Gn_nearest_init, params_ged=params_ged, **params_iam)
-                ghat_new_list += ghat_new_list_tmp
-        else: # only the best graph in D_k is used to initialize IAM.
-            Gn_nearest_init = [g.copy() for g in Gk]
-            ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
-                    params_ged=params_ged, **params_iam)
-
-#        for g in g_tmp_list:
-#            nx.draw_networkx(g)
-#            plt.show()
-#            draw_Letter_graph(g)
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-        dhat_new_list = []
-        
-        for idx, g_tmp in enumerate(ghat_new_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                            len(ghat_new_list) + len(Gn_median) + 1), 
-                            alpha, knew, withterm3=False))
-                
-        # find the new k nearest graphs. 
-        for idx_g, ghat_new in enumerate(ghat_new_list):          
-            dhat_new = dhat_new_list[idx_g]
-            
-            # if the new distance is smaller than the max of D_k.           
-            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
-                # check if the new distance is the same as one in D_k.
-                is_duplicate = False
-                for dis_tmp in dis_k[1:-1]:
-                    if np.abs(dhat_new - dis_tmp) < epsilon:
-                        is_duplicate = True
-                        print('IAM: duplicate k nearest graph generated.')
-                        break
-                if not is_duplicate:
-                    if np.abs(dhat_new - dhat) < epsilon:
-                        print('IAM: I am equal!')
-#                        dhat = dhat_new
-#                        ghat_list = [ghat_new.copy()]
-                    else:
-                        print('IAM: we got better k nearest neighbors!')
-                        nb_updated_k_iam += 1
-                        print('the k nearest neighbors are updated', 
-                              nb_updated_k_iam, 'times.')
-                        
-                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
-                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
-                        sort_idx = np.argsort(dis_k)
-                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
-                        if dhat_new < dhat:
-                            print('IAM: I have smaller distance!')
-                            print(str(dhat) + '->' + str(dhat_new))
-                            dhat = dhat_new
-                            ghat_list = [Gk[0].copy()]
-                            r = 0
-                            nb_updated_iam += 1
-                        
-                            print('the graph is updated by IAM', nb_updated_iam, 
-                                  'times.')                       
-                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
-                                with_labels=True)
-                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-                            plt.show()
-                        
-                        found_iam = True
-                        
-        # when new distance is not smaller than the max of D_k, use random generation.
-        if not found_iam:
-            print('Distance not better, switching to random generation now.')
-            print(str(dhat) + '->' + str(dhat_new))
-            
-            if InitRandomWithAllDk: # use all k nearest graphs as the initials.
-                init_list = [g_init.copy() for g_init in Gk]
-            else: # use just the nearest graph as the initial.
-                init_list = [Gk[0].copy()]
-            
-            # number of edges to be changed.
-            if len(init_list) == 1:
-                # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
-    #            fdgs = dhat_new
-                fdgs = nb_updated_random + 1
-                if fdgs < 1:
-                    fdgs = 1
-                fdgs = int(np.ceil(np.log(fdgs)))
-                if fdgs < 1:
-                    fdgs += 1
-    #            fdgs = nb_updated_random + 1 # @todo:
-                fdgs_list = [fdgs]
-            else:
-                # @todo what if the log is negetive? how to choose alpha (scalar)?
-                fdgs_list = np.array(dis_k[:])
-                if np.min(fdgs_list) < 1:
-                    fdgs_list /= dis_k[0]
-                fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
-                if np.min(fdgs_list) < 1:
-                    fdgs_list = np.array(fdgs_list) + 1
-                
-            l = 0
-            found_random = False
-            while l < l_max and not found_random:
-                for idx_g, g_tmp in enumerate(init_list):
-                    # add and delete edges.
-                    ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
-                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
-                    nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
-                    np.random.seed()
-                    # which edges to change.                
-                    # @todo: what if fdgs is bigger than nb_vpairs?
-                    idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if 
-                                               fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
-#                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
-#                                               (nx.number_of_nodes(gs) - 1), fdgs)
-                    for item in idx_change:
-                        node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
-                        node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
-                        if node2 >= node1: # skip the self pair.
-                            node2 += 1
-                        # @todo: is the randomness correct?
-                        if not ghat_new.has_edge(node1, node2):
-                            ghat_new.add_edge(node1, node2)
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(ghat_new)
-    #                        plt.show()
-                        else:
-                            ghat_new.remove_edge(node1, node2)
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(ghat_new)
-    #                        plt.show()
-    #                nx.draw_networkx(ghat_new)
-    #                plt.show()
-                            
-                    # compute distance between \psi and the new generated graph.
-                    knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
-                    dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), 
-                                         alpha, knew, withterm3=False)
-                    # @todo: the new distance is smaller or also equal?
-                    if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
-                        # check if the new distance is the same as one in D_k.
-                        is_duplicate = False
-                        for dis_tmp in dis_k[1:-1]:
-                            if np.abs(dhat_new - dis_tmp) < epsilon:
-                                is_duplicate = True
-                                print('Random: duplicate k nearest graph generated.')
-                                break
-                        if not is_duplicate:
-                            if np.abs(dhat_new - dhat) < epsilon:
-                                print('Random: I am equal!')
-        #                        dhat = dhat_new
-        #                        ghat_list = [ghat_new.copy()]
-                            else:
-                                print('Random: we got better k nearest neighbors!')
-                                print('l =', str(l))
-                                nb_updated_k_random += 1
-                                print('the k nearest neighbors are updated by random generation', 
-                                          nb_updated_k_random, 'times.')
-                                
-                                dis_k = [dhat_new] + dis_k # add the new nearest distances.
-                                Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
-                                sort_idx = np.argsort(dis_k)
-                                dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-                                Gk = [Gk[idx] for idx in sort_idx[0:k]]
-                                if dhat_new < dhat:
-                                    print('\nRandom: I am smaller!')
-                                    print('l =', str(l))
-                                    print(dhat, '->', dhat_new)                       
-                                    dhat = dhat_new
-                                    ghat_list = [ghat_new.copy()]
-                                    r = 0
-                                    nb_updated_random += 1
-        
-                                    print('the graph is updated by random generation', 
-                                          nb_updated_random, 'times.')
-                                             
-                                    nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'), 
-                                        with_labels=True)
-        ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-                                    plt.show()
-                                found_random = True
-                                break
-                l += 1
-            if not found_random: # l == l_max:
-                r += 1            
-            
-        dis_of_each_itr.append(dhat)
-        itr_total += 1
-        print('\nthe k shortest distances are', dis_k)
-        print('the shortest distances for previous iterations are', dis_of_each_itr)
-        
-    print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
-          nb_updated_random, 'times.')
-    print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 
-          'times, and by random generation', nb_updated_k_random, 'times.')
-    print('distances in kernel space:', dis_of_each_itr, '\n')
-    
-    return dhat, ghat_list, dis_of_each_itr[-1], \
-            nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random
-
-
-###############################################################################
-# Old implementations.
-    
-#def gk_iam(Gn, alpha):
-#    """This function constructs graph pre-image by the iterative pre-image 
-#    framework in reference [1], algorithm 1, where the step of generating new 
-#    graphs randomly is replaced by the IAM algorithm in reference [2].
-#    
-#    notes
-#    -----
-#    Every time a better graph is acquired, the older one is replaced by it.
-#    """
-#    pass
-#    # compute k nearest neighbors of phi in DN.
-#    dis_list = [] # distance between g_star and each graph.
-#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
-#        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-#                      k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
-#                      (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-#                      k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
-#        dis_list.append(dtemp)
-#        
-#    # sort
-#    sort_idx = np.argsort(dis_list)
-#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
-#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
-#    if dis_gs[0] == 0: # the exact pre-image.
-#        print('The exact pre-image is found from the input dataset.')
-#        return 0, g0hat
-#    dhat = dis_gs[0] # the nearest distance
-#    Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
-#    gihat_list = []
-#    
-##    i = 1
-#    r = 1
-#    while r < r_max:
-#        print('r =', r)
-##        found = False
-#        Gs_nearest = Gk + gihat_list
-#        g_tmp = iam(Gs_nearest)
-#        
-#        # compute distance between \psi and the new generated graph.
-#        knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
-#                       p_quit=lmbda, n_iteration=20, remove_totters=False,
-#                       n_jobs=multiprocessing.cpu_count(), verbose=False)
-#        dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * 
-#              knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
-#              (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-#              k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
-#        if dnew <= dhat: # the new distance is smaller
-#            print('I am smaller!')
-#            dhat = dnew
-#            g_new = g_tmp.copy() # found better graph.
-#            gihat_list = [g_new]
-#            dis_gs.append(dhat)
-#            r = 0
-#        else:
-#            r += 1
-#            
-#    ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
-#    
-#    return dhat, ghat
-
-
-#def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
-#    """This function constructs graph pre-image by the iterative pre-image 
-#    framework in reference [1], algorithm 1, where the step of generating new 
-#    graphs randomly is replaced by the IAM algorithm in reference [2].
-#    
-#    notes
-#    -----
-#    Every time a better graph is acquired, its distance in kernel space is
-#    compared with the k nearest ones, and the k nearest distances from the k+1
-#    distances will be used as the new ones.
-#    """
-#    # compute k nearest neighbors of phi in DN.
-#    dis_list = [] # distance between g_star and each graph.
-#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
-#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
-##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
-##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
-##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
-#        dis_list.append(dtemp)
-#        
-#    # sort
-#    sort_idx = np.argsort(dis_list)
-#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
-#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
-#    if dis_gs[0] == 0: # the exact pre-image.
-#        print('The exact pre-image is found from the input dataset.')
-#        return 0, g0hat
-#    dhat = dis_gs[0] # the nearest distance
-#    ghat = g0hat.copy()
-#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw_networkx(gi)
-#        plt.show()
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-#    Gs_nearest = Gk.copy()
-##    gihat_list = []
-#    
-##    i = 1
-#    r = 1
-#    while r < r_max:
-#        print('r =', r)
-##        found = False
-##        Gs_nearest = Gk + gihat_list
-##        g_tmp = iam(Gs_nearest)
-#        g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
-#        nx.draw_networkx(g_tmp)
-#        plt.show()
-#        print(g_tmp.nodes(data=True))
-#        print(g_tmp.edges(data=True))
-#        
-#        # compute distance between \psi and the new generated graph.
-#        gi_list = [Gn[i] for i in idx_gi]
-#        knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
-#        dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
-#        
-##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
-##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
-##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
-##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
-#        if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
-#            print('I am smaller!')
-#            print(str(dhat) + '->' + str(dnew))
-##            nx.draw_networkx(ghat)
-##            plt.show()
-##            print('->')
-##            nx.draw_networkx(g_tmp)
-##            plt.show()
-#            
-#            dhat = dnew
-#            g_new = g_tmp.copy() # found better graph.
-#            ghat = g_tmp.copy()
-#            dis_gs.append(dhat) # add the new nearest distance.
-#            Gs_nearest.append(g_new) # add the corresponding graph.
-#            sort_idx = np.argsort(dis_gs)
-#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
-#            r = 0
-#        else:
-#            r += 1
-#    
-#    return dhat, ghat
-
-
-#def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
-#    """This function constructs graph pre-image by the iterative pre-image 
-#    framework in reference [1], algorithm 1, where the step of generating new 
-#    graphs randomly is replaced by the IAM algorithm in reference [2].
-#    
-#    notes
-#    -----
-#    Every time a set of n better graphs is acquired, their distances in kernel space are
-#    compared with the k nearest ones, and the k nearest distances from the k+n
-#    distances will be used as the new ones.
-#    """
-#    Gn_median = [Gn[idx].copy() for idx in idx_gi]
-#    # compute k nearest neighbors of phi in DN.
-#    dis_list = [] # distance between g_star and each graph.
-#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
-#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
-##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
-##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
-##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
-#        dis_list.append(dtemp)
-#        
-#    # sort
-#    sort_idx = np.argsort(dis_list)
-#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
-#    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
-#    g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
-#    if dis_gs[0] == 0: # the exact pre-image.
-#        print('The exact pre-image is found from the input dataset.')
-#        return 0, g0hat_list
-#    dhat = dis_gs[0] # the nearest distance
-#    ghat_list = [g.copy() for g in g0hat_list]
-#    for g in ghat_list:
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw_networkx(gi)
-#        plt.show()
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-#    Gs_nearest = Gk.copy()
-##    gihat_list = []
-#    
-##    i = 1
-#    r = 1
-#    while r < r_max:
-#        print('r =', r)
-##        found = False
-##        Gs_nearest = Gk + gihat_list
-##        g_tmp = iam(Gs_nearest)
-#        g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
-#                Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
-#        for g in g_tmp_list:
-#            nx.draw_networkx(g)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-#        
-#        # compute distance between \psi and the new generated graphs.
-#        gi_list = [Gn[i] for i in idx_gi]
-#        knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
-#        dnew_list = []
-#        for idx, g_tmp in enumerate(g_tmp_list):
-#            dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), 
-#                            len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
-#        
-##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
-##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
-##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
-##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
-#            
-#        # find the new k nearest graphs.
-#        dis_gs = dnew_list + dis_gs # add the new nearest distances.
-#        Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
-#        sort_idx = np.argsort(dis_gs)
-#        if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
-#            print('We got better k nearest neighbors! Hurray!')
-#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-#            print(dis_gs[-1])
-#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
-#            nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
-#            if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
-#                print('I have smaller or equal distance!')
-#                dhat = dis_gs[0]
-#                print(str(dhat) + '->' + str(dhat))
-#                idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
-#                ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
-#                for g in ghat_list:
-#                    nx.draw_networkx(g)
-#                    plt.show()
-#                    print(g.nodes(data=True))
-#                    print(g.edges(data=True))
-#            r = 0
-#        else:
-#            r += 1
-#    
-#    return dhat, ghat_list
\ No newline at end of file
diff --git a/gklearn/preimage/preimage_random.py b/gklearn/preimage/preimage_random.py
deleted file mode 100644
index e5f74cd..0000000
--- a/gklearn/preimage/preimage_random.py
+++ /dev/null
@@ -1,309 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar  6 16:03:11 2019
-
-pre-image
-@author: ljia
-"""
-
-import sys
-import numpy as np
-import random
-from tqdm import tqdm
-import networkx as nx
-import matplotlib.pyplot as plt
-
-from gklearn.preimage.utils import compute_kernel, dis_gstar
-
-
-def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
-    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
-    
-    # compute k nearest neighbors of phi in DN.
-    dis_list = [] # distance between g_star and each graph.
-    term3 = 0
-    for i1, a1 in enumerate(alpha):
-        for i2, a2 in enumerate(alpha):
-            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
-        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
-        dis_list.append(dtemp)
-#    print(np.max(dis_list))
-#    print(np.min(dis_list))
-#    print(np.min([item for item in dis_list if item != 0]))
-#    print(np.mean(dis_list))
-        
-    # sort
-    sort_idx = np.argsort(dis_list)
-    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
-    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
-    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
-    if dis_gs[0] == 0: # the exact pre-image.
-        print('The exact pre-image is found from the input dataset.')
-        return 0, g0hat_list[0], 0
-    dhat = dis_gs[0] # the nearest distance
-#    ghat_list = [g.copy() for g in g0hat_list]
-#    for g in ghat_list:
-#        draw_Letter_graph(g)
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-##        nx.draw_networkx(gi)
-##        plt.show()
-#        draw_Letter_graph(g)
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-    Gs_nearest = [g.copy() for g in Gk]
-    gihat_list = []
-    dihat_list = []
-    
-#    i = 1
-    r = 0
-#    sod_list = [dhat]
-#    found = False
-    dis_of_each_itr = [dhat]
-    nb_updated = 0
-    g_best = []
-    while r < r_max:
-        print('\nr =', r)
-        print('itr for gk =', nb_updated, '\n')
-        found = False
-        dis_bests = dis_gs + dihat_list
-        # @todo what if the log is negetive? how to choose alpha (scalar)?
-        fdgs_list = np.array(dis_bests)
-        if np.min(fdgs_list) < 1:
-            fdgs_list /= np.min(dis_bests)
-        fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
-        if np.min(fdgs_list) < 1:
-            fdgs_list = np.array(fdgs_list) + 1
-            
-        for ig, gs in enumerate(Gs_nearest + gihat_list):
-#            nx.draw_networkx(gs)
-#            plt.show()
-            for trail in range(0, l):
-#            for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
-                # add and delete edges.
-                gtemp = gs.copy()
-                np.random.seed()
-                # which edges to change.
-                # @todo: should we use just half of the adjacency matrix for undirected graphs?
-                nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
-                # @todo: what if fdgs is bigger than nb_vpairs?
-                idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if 
-                                           fdgs_list[ig] < nb_vpairs else nb_vpairs)
-#                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
-#                                               (nx.number_of_nodes(gs) - 1), fdgs)
-                for item in idx_change:
-                    node1 = int(item / (nx.number_of_nodes(gs) - 1))
-                    node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
-                    if node2 >= node1: # skip the self pair.
-                        node2 += 1
-                    # @todo: is the randomness correct?
-                    if not gtemp.has_edge(node1, node2):
-                        gtemp.add_edge(node1, node2)
-#                        nx.draw_networkx(gs)
-#                        plt.show()
-#                        nx.draw_networkx(gtemp)
-#                        plt.show()
-                    else:
-                        gtemp.remove_edge(node1, node2)
-#                        nx.draw_networkx(gs)
-#                        plt.show()
-#                        nx.draw_networkx(gtemp)
-#                        plt.show()
-#                nx.draw_networkx(gtemp)
-#                plt.show()
-                
-                # compute distance between \psi and the new generated graph.
-#                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
-#                               p_quit=lmbda, n_iteration=20, remove_totters=False,
-#                               n_jobs=multiprocessing.cpu_count(), verbose=False)
-                knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
-                dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, 
-                                 withterm3=False)
-                if dnew <= dhat: # @todo: the new distance is smaller or also equal?
-                    if dnew < dhat:
-                        print('\nI am smaller!')
-                        print('ig =', str(ig), ', l =', str(trail))
-                        print(dhat, '->', dnew)
-                        nb_updated += 1
-                    elif dnew == dhat:                   
-                        print('I am equal!') 
-#                    nx.draw_networkx(gtemp)
-#                    plt.show()
-#                    print(gtemp.nodes(data=True))
-#                    print(gtemp.edges(data=True))
-                    dhat = dnew
-                    gnew = gtemp.copy()
-                    found = True # found better graph.                  
-        if found:
-            r = 0
-            gihat_list = [gnew]
-            dihat_list = [dhat]
-        else:
-            r += 1
-            
-        dis_of_each_itr.append(dhat)
-        print('the shortest distances for previous iterations are', dis_of_each_itr)
-#    dis_best.append(dhat)
-    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
-    print('distances in kernel space:', dis_of_each_itr, '\n')
-    
-    return dhat, g_best, nb_updated
-#    return 0, 0, 0
-
-
-if __name__ == '__main__':
-    from gklearn.utils.graphfiles import loadDataset
-    
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
-#            'extra_params': {}} # node symb
-    
-    DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    #DN = DN[0:10]
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # 10 # iteration limit.
-    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    #alpha_range = np.linspace(0.1, 0.9, 9)
-    k = 10 # 5 # k nearest neighbors
-    
-    # randomly select two molecules
-    #np.random.seed(1)
-    #idx1, idx2 = np.random.randint(0, len(DN), 2)
-    #g1 = DN[idx1]
-    #g2 = DN[idx2]
-    idx1 = 0
-    idx2 = 6
-    g1 = DN[idx1]
-    g2 = DN[idx2]
-    
-    # compute 
-    k_list = [] # kernel between each graph and itself.
-    k_g1_list = [] # kernel between each graph and g1
-    k_g2_list = [] # kernel between each graph and g2
-    for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout): 
-    #    ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
-    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
-    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
-        ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False)
-        k_list.append(ktemp[0, 0])
-        k_g1_list.append(ktemp[0, 1])
-        k_g2_list.append(ktemp[0, 2])
-    
-    g_best = []
-    dis_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('alpha =', alpha)
-        # compute k nearest neighbors of phi in DN.
-        dis_list = [] # distance between g_star and each graph.
-        for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout):
-            dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-                          k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
-                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
-            dis_list.append(np.sqrt(dtemp))
-        
-        # sort
-        sort_idx = np.argsort(dis_list)
-        dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
-        g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN
-        if dis_gs[0] == 0: # the exact pre-image.
-            print('The exact pre-image is found from the input dataset.')
-            g_pimg = g0hat
-            break
-        dhat = dis_gs[0] # the nearest distance
-        Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
-        gihat_list = []
-        
-        i = 1
-        r = 1
-        while r < r_max:
-            print('r =', r)
-            found = False
-            for ig, gs in enumerate(Dk + gihat_list):
-    #            nx.draw_networkx(gs)
-    #            plt.show()
-                # @todo what if the log is negetive?
-                fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig]))))
-                for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout):
-                    # add and delete edges.
-                    gtemp = gs.copy()
-                    np.random.seed()
-                    # which edges to change.
-                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
-                    nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
-                    # @todo: what if fdgs is bigger than nb_vpairs?
-                    idx_change = random.sample(range(nb_vpairs), fdgs if fdgs < nb_vpairs else nb_vpairs)
-    #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
-    #                                               (nx.number_of_nodes(gs) - 1), fdgs)
-                    for item in idx_change:
-                        node1 = int(item / (nx.number_of_nodes(gs) - 1))
-                        node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
-                        if node2 >= node1: # skip the self pair.
-                            node2 += 1
-                        # @todo: is the randomness correct?
-                        if not gtemp.has_edge(node1, node2):
-                            # @todo: how to update the bond_type? 0 or 1?
-                            gtemp.add_edges_from([(node1, node2, {'bond_type': 1})])
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(gtemp)
-    #                        plt.show()
-                        else:
-                            gtemp.remove_edge(node1, node2)
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(gtemp)
-    #                        plt.show()
-    #                nx.draw_networkx(gtemp)
-    #                plt.show()
-                    
-                    # compute distance between phi and the new generated graph.
-    #                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
-    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
-    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
-                    knew = compute_kernel([gtemp, g1, g2], 'untilhpathkernel', verbose=False)
-                    dnew = np.sqrt(knew[0, 0] - 2 * (alpha * knew[0, 1] + (1 - alpha) * 
-                          knew[0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
-                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]))
-                    if dnew < dhat: # @todo: the new distance is smaller or also equal?
-                        print('I am smaller!')
-                        print(dhat, '->', dnew)
-                        nx.draw_networkx(gtemp)
-                        plt.show()
-                        print(gtemp.nodes(data=True))
-                        print(gtemp.edges(data=True))
-                        dhat = dnew
-                        gnew = gtemp.copy()
-                        found = True # found better graph.
-                        r = 0
-                    elif dnew == dhat:                   
-                        print('I am equal!')                   
-            if found:
-                gihat_list = [gnew]
-                dis_gs.append(dhat)
-            else:
-                r += 1
-        dis_best.append(dhat)
-        g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list)       
-    
-    
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-        print('the corresponding pre-image is')
-        nx.draw_networkx(g_best[idx])
-        plt.show()
\ No newline at end of file
diff --git a/gklearn/preimage/python_code.py b/gklearn/preimage/python_code.py
deleted file mode 100644
index 3772526..0000000
--- a/gklearn/preimage/python_code.py
+++ /dev/null
@@ -1,122 +0,0 @@
-		elif opt_name == 'random-inits':
-			try:
-				num_random_inits_ = std::stoul(opt_val)
-				desired_num_random_inits_ = num_random_inits_
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
-
-			if num_random_inits_ <= 0:
-				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
-
-		}
-		elif opt_name == 'randomness':
-			if opt_val == 'PSEUDO':
-				use_real_randomness_ = False
-
-			elif opt_val == 'REAL':
-				use_real_randomness_ = True
-
-			else:
-				raise Error('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
-
-		}
-		elif opt_name == 'stdout':
-			if opt_val == '0':
-				print_to_stdout_ = 0
-
-			elif opt_val == '1':
-				print_to_stdout_ = 1
-
-			elif opt_val == '2':
-				print_to_stdout_ = 2
-
-			else:
-				raise Error('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
-
-		}
-		elif opt_name == 'refine':
-			if opt_val == 'TRUE':
-				refine_ = True
-
-			elif opt_val == 'FALSE':
-				refine_ = False
-
-			else:
-				raise Error('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
-
-		}
-		elif opt_name == 'time-limit':
-			try:
-				time_limit_in_sec_ = std::stod(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
-
-		}
-		elif opt_name == 'max-itrs':
-			try:
-				max_itrs_ = std::stoi(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
-
-		}
-		elif opt_name == 'max-itrs-without-update':
-			try:
-				max_itrs_without_update_ = std::stoi(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
-
-		}
-		elif opt_name == 'seed':
-			try:
-				seed_ = std::stoul(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
-
-		}
-		elif opt_name == 'epsilon':
-			try:
-				epsilon_ = std::stod(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
-
-			if epsilon_ <= 0:
-				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
-
-		}
-		elif opt_name == 'inits-increase-order':
-			try:
-				num_inits_increase_order_ = std::stoul(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
-
-			if num_inits_increase_order_ <= 0:
-				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
-
-		}
-		elif opt_name == 'init-type-increase-order':
-			init_type_increase_order_ = opt_val
-			if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
-				raise Exception(std::string('Invalid argument ') + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
-
-		}
-		elif opt_name == 'max-itrs-increase-order':
-			try:
-				max_itrs_increase_order_ = std::stoi(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
-
-		}
-		else:
-			std::string valid_options('[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] ')
-			valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
-			valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
-			raise Error(std::string('Invalid option "') + opt_name + '". Usage: options = "' + valid_options + '"')
-
diff --git a/gklearn/preimage/test.py b/gklearn/preimage/test.py
deleted file mode 100644
index 4110a6f..0000000
--- a/gklearn/preimage/test.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
-
-#Pour que "import script" trouve les librairies qu'a besoin GedLib
-#Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
-import gedlibpy.librariesImport
-from  gedlibpy import gedlibpy
-import networkx as nx
-
-
-def init() :
-    print("List of Edit Cost Options : ")
-    for i in gedlibpy.list_of_edit_cost_options :
-        print (i)
-    print("")
-
-    print("List of Method Options : ")
-    for j in gedlibpy.list_of_method_options :
-        print (j)
-    print("")
-
-    print("List of Init Options : ")
-    for k in gedlibpy.list_of_init_options :
-        print (k)
-    print("")
-    
-def test():
-    
-    gedlibpy.load_GXL_graphs('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml')
-    listID = gedlibpy.get_all_graph_ids()
-    gedlibpy.set_edit_cost("CHEM_1")
-    gedlibpy.init()
-    gedlibpy.set_method("IPFP", "")
-    gedlibpy.init_method()
-    g = listID[0]
-    h = listID[1]
-    gedlibpy.run_method(g, h)
-    print("Node Map : ", gedlibpy.get_node_map(g,h))
-    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
-    print("Assignment Matrix : ")
-    print(gedlibpy.get_assignment_matrix(g, h))
-    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g,h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
-
-
-def convertGraph(G):
-    G_new = nx.Graph()
-    for nd, attrs in G.nodes(data=True):
-        G_new.add_node(str(nd), chem=attrs['atom'])
-    for nd1, nd2, attrs in G.edges(data=True):
-        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-        
-    return G_new
-
-
-def testNxGrapĥ():
-    from gklearn.utils.graphfiles import loadDataset
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    
-    gedlibpy.restart_env()
-    for graph in Gn:
-        g_new = convertGraph(graph)
-        gedlibpy.add_nx_graph(g_new, "")
-        
-    listID = gedlibpy.get_all_graph_ids()
-    gedlibpy.set_edit_cost("CHEM_1")
-    gedlibpy.init()
-    gedlibpy.set_method("IPFP", "")
-    gedlibpy.init_method()
-
-    print(listID)
-    g = listID[0]
-    h = listID[1]
-
-    gedlibpy.run_method(g, h)
-
-    print("Node Map : ", gedlibpy.get_node_map(g, h))
-    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
-    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
-
-#test()
-init() 
-#testNxGrapĥ()
diff --git a/gklearn/preimage/test_fitDistance.py b/gklearn/preimage/test_fitDistance.py
deleted file mode 100644
index 2945a24..0000000
--- a/gklearn/preimage/test_fitDistance.py
+++ /dev/null
@@ -1,648 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 24 11:50:56 2019
-
-@author: ljia
-"""
-from matplotlib import pyplot as plt
-import numpy as np
-from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.utils import remove_edges
-from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
-from gklearn.preimage.utils import normalize_distance_matrix
-
-
-def test_update_costs():
-    from preimage.fitDistance import update_costs
-    import cvxpy as cp
-    
-    ds = np.load('results/xp_fit_method/fit_data_debug4.gm.npz')
-    nb_cost_mat = ds['nb_cost_mat']
-    dis_k_vec = ds['dis_k_vec']
-    n_edit_operations = ds['n_edit_operations']
-    ged_vec_init = ds['ged_vec_init']
-    ged_mat = ds['ged_mat']
-    
-    nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
-    x = cp.Variable(nb_cost_mat_new.shape[1])
-    cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-#    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
-#                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-#    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
-#                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
-#                   np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
-    constraints = [x >= [0.00 for i in range(nb_cost_mat_new.shape[1])],
-                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
-#    constraints = [x >= [0.00000 for i in range(nb_cost_mat_new.shape[1])]]
-    prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-    prob.solve()
-    print(x.value)
-    edit_costs_new = np.concatenate((x.value, np.array([0.0])))
-    residual = np.sqrt(prob.value)
-
-
-def median_paper_clcpc_python_best():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
-       python invoking the c++ code by bash command (with updated library).
-    """
-#    ds = {'name': 'monoterpenoides', 
-#          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-#    _, y_all = loadDataset(ds['dataset'])
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 6
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                'algo_options': algo_options, 'stabilizer': None}
-    
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    
-    fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt'
-
-    for y in y_all:
-        for repeat in range(repeats):
-            edit_costs_output_file = open(fn_edit_costs_output, 'a')
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
-            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-                nb_cost_mat_list = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                            gkernel, itr_max, params_ged=params_ged, 
-                                            parallel=True)
-            total_time = np.sum(time_list)
-#            print('\nedit_costs:', edit_costs)
-#            print('\nresidual_list:', residual_list)
-#            print('\nedit_cost_list:', edit_cost_list)
-#            print('\ndistance matrix in kernel space:', dis_k_mat)
-#            print('\nged matrix:', ged_mat)
-#            print('\ntotal time:', total_time)
-#            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-            np.savez('results/median_paper/fit_distance.clcpc.python_init40.monot.elabeled.uhpkernel.y' 
-                     + y + '.repeat' + str(repeat) + '.k10..gm', 
-                     edit_costs=edit_costs, 
-                     residual_list=residual_list, edit_cost_list=edit_cost_list,
-                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
-            
-            for ec in edit_costs:
-                edit_costs_output_file.write(str(ec) + ' ')
-            edit_costs_output_file.write('\n')
-            edit_costs_output_file.close()
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-    
-            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-            print(nb_consistent, nb_inconsistent, ratio_consistent)
-                      
-#            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-#            plt.imshow(norm_dis_k_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_ged_mat = normalize_distance_matrix(ged_mat)
-#            plt.imshow(norm_ged_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_diff = norm_ged_mat - norm_dis_k_mat
-#            plt.imshow(norm_diff)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#        #    draw_count_bar(norm_diff)
-
-
-def median_paper_clcpc_python_bash_cpp():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
-       python invoking the c++ code by bash command (with updated library).
-    """
-#    ds = {'name': 'monoterpenoides', 
-#          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-#    _, y_all = loadDataset(ds['dataset'])
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 20
-    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                'algo_options': algo_options}
-    
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    
-    fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt'
-
-    for y in y_all:
-        for repeat in range(repeats):
-            edit_costs_output_file = open(fn_edit_costs_output, 'a')
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
-            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-                nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                            gkernel, itr_max, params_ged=params_ged, 
-                                            parallel=False)
-            total_time = np.sum(time_list)
-#            print('\nedit_costs:', edit_costs)
-#            print('\nresidual_list:', residual_list)
-#            print('\nedit_cost_list:', edit_cost_list)
-#            print('\ndistance matrix in kernel space:', dis_k_mat)
-#            print('\nged matrix:', ged_mat)
-#            print('\ntotal time:', total_time)
-#            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-            np.savez('results/median_paper/fit_distance.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-                     + y + '.repeat' + str(repeat) + '.gm', 
-                     edit_costs=edit_costs, 
-                     residual_list=residual_list, edit_cost_list=edit_cost_list,
-                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
-                     coef_dk=coef_dk)
-            
-            for ec in edit_costs:
-                edit_costs_output_file.write(str(ec) + ' ')
-            edit_costs_output_file.write('\n')
-            edit_costs_output_file.close()
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-#    coef_dk = gmfile['coef_dk']
-    
-            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-            print(nb_consistent, nb_inconsistent, ratio_consistent)
-                      
-#            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-#            plt.imshow(norm_dis_k_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_ged_mat = normalize_distance_matrix(ged_mat)
-#            plt.imshow(norm_ged_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_diff = norm_ged_mat - norm_dis_k_mat
-#            plt.imshow(norm_diff)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#        #    draw_count_bar(norm_diff)
-
-
-
-
-
-def test_cs_leq_ci_plus_cr_python_bash_cpp():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
-       python invoking the c++ code by bash command (with updated library).
-    """
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:10]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 10
-    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                'algo_options': algo_options}
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                    gkernel, itr_max, params_ged=params_ged, 
-                                    parallel=False)
-    total_time = np.sum(time_list)
-    print('\nedit_costs:', edit_costs)
-    print('\nresidual_list:', residual_list)
-    print('\nedit_cost_list:', edit_cost_list)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-    print('\ntotal time:', total_time)
-    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-    np.savez('results/fit_distance.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel.gm', 
-             edit_costs=edit_costs, 
-             residual_list=residual_list, edit_cost_list=edit_cost_list,
-             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
-             coef_dk=coef_dk)
-    
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-##    Gn = Gn[0:10]
-##    remove_edges(Gn)
-#    gkernel = 'untilhpathkernel'
-#    node_label = 'atom'
-#    edge_label = 'bond_type'
-#    itr_max = 10
-#    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-#        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-#                                                      gkernel, itr_max)
-#    total_time = np.sum(time_list)
-#    print('\nedit_costs:', edit_costs)
-#    print('\nresidual_list:', residual_list)
-#    print('\nedit_cost_list:', edit_cost_list)
-#    print('\ndistance matrix in kernel space:', dis_k_mat)
-#    print('\nged matrix:', ged_mat)
-#    print('\ntotal time:', total_time)
-#    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.mutag.elabeled.uhpkernel.gm', 
-#             edit_costs=edit_costs, 
-#             residual_list=residual_list, edit_cost_list=edit_cost_list,
-#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-#    coef_dk = gmfile['coef_dk']
-    
-    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-    print(nb_consistent, nb_inconsistent, ratio_consistent)
-    
-#    dis_k_sub = pairwise_substitution(dis_k_mat)
-#    ged_sub = pairwise_substitution(ged_mat)    
-#    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.gm', 
-#             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
-    
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-#    draw_count_bar(norm_diff)
-
-
-def test_anycosts():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:10]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    itr_max = 10
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
-    total_time = np.sum(time_list)
-    print('\nedit_costs:', edit_costs)
-    print('\nresidual_list:', residual_list)
-    print('\nedit_cost_list:', edit_cost_list)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-    print('\ntotal time:', total_time)
-    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-    np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, 
-             residual_list=residual_list, edit_cost_list=edit_cost_list,
-             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.any_costs.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-##    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-#    draw_count_bar(norm_diff)
-    
-
-def test_cs_leq_ci_plus_cr():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er
-    """
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:10]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 10
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                                      gkernel, itr_max,
-                                                      fitkernel='gaussian')
-    total_time = np.sum(time_list)
-    print('\nedit_costs:', edit_costs)
-    print('\nresidual_list:', residual_list)
-    print('\nedit_cost_list:', edit_cost_list)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-    print('\ntotal time:', total_time)
-    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-    np.savez('results/fit_distance.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel.gm', 
-             edit_costs=edit_costs, 
-             residual_list=residual_list, edit_cost_list=edit_cost_list,
-             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
-             coef_dk=coef_dk)
-    
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-##    Gn = Gn[0:10]
-##    remove_edges(Gn)
-#    gkernel = 'untilhpathkernel'
-#    node_label = 'atom'
-#    edge_label = 'bond_type'
-#    itr_max = 10
-#    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-#        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-#                                                      gkernel, itr_max)
-#    total_time = np.sum(time_list)
-#    print('\nedit_costs:', edit_costs)
-#    print('\nresidual_list:', residual_list)
-#    print('\nedit_cost_list:', edit_cost_list)
-#    print('\ndistance matrix in kernel space:', dis_k_mat)
-#    print('\nged matrix:', ged_mat)
-#    print('\ntotal time:', total_time)
-#    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.mutag.elabeled.uhpkernel.gm', 
-#             edit_costs=edit_costs, 
-#             residual_list=residual_list, edit_cost_list=edit_cost_list,
-#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-#    coef_dk = gmfile['coef_dk']
-    
-    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-    print(nb_consistent, nb_inconsistent, ratio_consistent)
-    
-#    dis_k_sub = pairwise_substitution(dis_k_mat)
-#    ged_sub = pairwise_substitution(ged_mat)    
-#    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.cost_leq_1en2.gm', 
-#             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
-    
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-#    draw_count_bar(norm_diff)
-    
-    
-def test_unfitted():
-    """unfitted.
-    """  
-    from fitDistance import compute_geds
-    from utils import kernel_distance_matrix
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:10]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-        
-
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-##    Gn = Gn[0:10]
-##    remove_edges(Gn)
-#    gkernel = 'marginalizedkernel'
-
-    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel)
-    ged_all, ged_mat, n_edit_operations = compute_geds(Gn, [3, 3, 1, 3, 3, 1], 
-            [0, 1, 2, 3, 4, 5], parallel=True)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.gm', edit_costs=edit_costs, 
-#             residual_list=residual_list, edit_cost_list=edit_cost_list,
-#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list) 
-    
-    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en3.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-    
-    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-    print(nb_consistent, nb_inconsistent, ratio_consistent)
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
-    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    draw_count_bar(norm_diff)
-    
-    
-def pairwise_substitution_consistence(mat1, mat2):
-    """
-    """
-    nb_consistent = 0
-    nb_inconsistent = 0
-    # the matrix is considered symmetric.
-    upper_tri1 = mat1[np.triu_indices_from(mat1)]
-    upper_tri2 = mat2[np.tril_indices_from(mat2)]
-    for i in tqdm(range(len(upper_tri1)), desc='computing consistence', file=sys.stdout):
-        for j in range(i, len(upper_tri1)):
-            if np.sign(upper_tri1[i] - upper_tri1[j]) == np.sign(upper_tri2[i] - upper_tri2[j]):
-                nb_consistent += 1
-            else:
-                nb_inconsistent += 1
-    return nb_consistent, nb_inconsistent, nb_consistent / (nb_consistent + nb_inconsistent)
-
-
-def pairwise_substitution(mat):
-    # the matrix is considered symmetric.
-    upper_tri = mat[np.triu_indices_from(mat)]
-    sub_list = []
-    for i in tqdm(range(len(upper_tri)), desc='computing', file=sys.stdout):
-        for j in range(i, len(upper_tri)):
-            sub_list.append(upper_tri[i] - upper_tri[j])
-    return sub_list
-    
-    
-def draw_count_bar(norm_diff):
-    import pandas
-    from collections import Counter, OrderedDict
-    norm_diff_cnt = norm_diff.flatten()
-    norm_diff_cnt = norm_diff_cnt * 10
-    norm_diff_cnt = np.floor(norm_diff_cnt)
-    norm_diff_cnt = Counter(norm_diff_cnt)
-    norm_diff_cnt = OrderedDict(sorted(norm_diff_cnt.items()))
-    df = pandas.DataFrame.from_dict(norm_diff_cnt, orient='index')
-    df.plot(kind='bar')
-    
-    
-if __name__ == '__main__':
-#    test_anycosts()
-#    test_cs_leq_ci_plus_cr()
-#    test_unfitted()
-    
-#    test_cs_leq_ci_plus_cr_python_bash_cpp()
-#    median_paper_clcpc_python_bash_cpp()
-#    median_paper_clcpc_python_best()
-
-#    x = np.array([[1,2,3],[4,5,6],[7,8,9]])
-#    xx = pairwise_substitution(x)
-    
-    test_update_costs()
\ No newline at end of file
diff --git a/gklearn/preimage/test_ged.py b/gklearn/preimage/test_ged.py
deleted file mode 100644
index 74e18a0..0000000
--- a/gklearn/preimage/test_ged.py
+++ /dev/null
@@ -1,520 +0,0 @@
-#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
-
-#Pour que "import script" trouve les librairies qu'a besoin GedLib
-#Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
-#import gedlibpy_linlin.librariesImport
-#from  gedlibpy_linlin import gedlibpy
-from libs import *
-import networkx as nx
-import numpy as np
-from tqdm import tqdm
-import sys
-
-
-def test_NON_SYMBOLIC_cost():
-    """Test edit cost LETTER2.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
-    from gklearn.preimage.test_k_closest_graphs import reform_attributes
-    from gklearn.utils.graphfiles import loadDataset
-
-    dataset = '../../datasets/Letter-high/Letter-high_A.txt'
-    Gn, y_all = loadDataset(dataset)
-
-    g1 = Gn[200]
-    g2 = Gn[1780]
-    reform_attributes(g1)
-    reform_attributes(g2)
-
-    c_vi = 0.675
-    c_vr = 0.675
-    c_vs = 0.75
-    c_ei = 0.425
-    c_er = 0.425
-    c_es = 0
-
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy',
-        cost='NON_SYMBOLIC', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, sod_vs, n_ei, n_er, sod_es = get_nb_edit_operations_nonsymbolic(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations:', n_vi, n_vr, sod_vs, n_ei, n_er, sod_es)
-    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
-    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
-        + c_ei * n_ei + c_er * n_er + c_es * sod_es
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-def test_LETTER2_cost():
-    """Test edit cost LETTER2.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
-    from gklearn.preimage.test_k_closest_graphs import reform_attributes
-    from gklearn.utils.graphfiles import loadDataset
-
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-
-    g1 = Gn[200]
-    g2 = Gn[1780]
-    reform_attributes(g1)
-    reform_attributes(g2)
-
-    c_vi = 0.675
-    c_vr = 0.675
-    c_vs = 0.75
-    c_ei = 0.425
-    c_er = 0.425
-
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er]
-    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
-        cost='LETTER2', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, n_vs, sod_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations:', n_vi, n_vr, n_vs, sod_vs, n_ei, n_er)
-    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er)
-    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
-        + c_ei * n_ei + c_er * n_er
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-
-def test_get_nb_edit_operations_letter():
-    """Test whether function preimage.ged.get_nb_edit_operations_letter returns
-    correct numbers of edit operations. The distance/cost computed by GED
-    should be the same as the cost computed by number of operations and edit
-    cost constants.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
-    from gklearn.preimage.test_k_closest_graphs import reform_attributes
-    from gklearn.utils.graphfiles import loadDataset
-
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-
-    g1 = Gn[200]
-    g2 = Gn[1780]
-    reform_attributes(g1)
-    reform_attributes(g2)
-
-    c_vir = 0.9
-    c_eir = 1.7
-    alpha = 0.75
-
-    edit_cost_constant = [c_vir, c_eir, alpha]
-    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
-        cost='LETTER', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, n_vs, c_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations and costs:', n_vi, n_vr, n_vs, c_vs, n_ei, n_er)
-    print('c_vir, c_eir, alpha:', c_vir, c_eir, alpha)
-    cost_computed = alpha * c_vir * (n_vi + n_vr) \
-        + alpha * c_vs \
-        + (1 - alpha) * c_eir * (n_ei + n_er)
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-def test_get_nb_edit_operations():
-    """Test whether function preimage.ged.get_nb_edit_operations returns correct
-    numbers of edit operations. The distance/cost computed by GED should be the
-    same as the cost computed by number of operations and edit cost constants.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations
-    from gklearn.utils.graphfiles import loadDataset
-    import os
-
-    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-
-    g1 = Gn[20]
-    g2 = Gn[108]
-
-    c_vi = 3
-    c_vr = 3
-    c_vs = 1
-    c_ei = 3
-    c_er = 3
-    c_es = 1
-
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    dis, pi_forward, pi_backward = GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy',
-        cost='CONSTANT', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations and costs:', n_vi, n_vr, n_vs, n_ei, n_er, n_es)
-    print('edit costs:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
-    cost_computed = n_vi * c_vi + n_vr * c_vr + n_vs * c_vs \
-        + n_ei * c_ei + n_er * c_er + n_es * c_es
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-def test_ged_python_bash_cpp():
-    """Test ged computation with python invoking the c++ code by bash command (with updated library).
-    """
-    from gklearn.utils.graphfiles import loadDataset
-    from gklearn.preimage.ged import GED
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-#    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
-
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_bash_' + str(repeat) + '_init40.3_20.txt'
-#        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-#        runtime_file = open(runtime_filename, 'a')
-
-        ged_mat = np.empty((len(Gn), len(Gn)))
-#        runtime_mat = np.empty((len(Gn), len(Gn)))
-
-        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(Gn)):
-                print(i, j)
-                g1 = Gn[i]
-                g2 = Gn[j]
-                upper_bound, _, _ = GED(g1, g2, lib='gedlib-bash', cost='CONSTANT',
-                                method='IPFP',
-                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
-                                algo_options=algo_options)
-#                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-#                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-#                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-#            runtime_file.write('\n')
-
-        ged_file.close()
-#        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-#    print('runtime_mat:')
-#    print(runtime_mat)
-
-    return
-
-
-
-def test_ged_best_settings_updated():
-    """Test ged computation with best settings the same as in the C++ code (with updated library).
-    """
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-#    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
-
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_updated_' + str(repeat) + '_init40.txt'
-        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_updated_' + str(repeat) + '_init40.txt'
-
-        gedlibpy.restart_env()
-        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
-        gedlibpy.init()
-        gedlibpy.set_method("IPFP", algo_options)
-        gedlibpy.init_method()
-
-        ged_mat = np.empty((len(listID), len(listID)))
-        runtime_mat = np.empty((len(listID), len(listID)))
-
-        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
-            ged_file = open(ged_filename, 'a')
-            runtime_file = open(runtime_filename, 'a')
-
-            for j in range(len(listID)):
-                g1 = listID[i]
-                g2 = listID[j]
-                gedlibpy.run_method(g1, g2)
-                upper_bound = gedlibpy.get_upper_bound(g1, g2)
-                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-            runtime_file.write('\n')
-
-            ged_file.close()
-            runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-    print('runtime_mat:')
-    print(runtime_mat)
-
-    return
-
-
-def test_ged_best_settings():
-    """Test ged computation with best settings the same as in the C++ code.
-    """
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_best_settings_' + str(repeat) + '.txt'
-        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_best_settings_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-        runtime_file = open(runtime_filename, 'a')
-
-        gedlibpy.restart_env()
-        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
-        gedlibpy.init()
-        gedlibpy.set_method("IPFP", algo_options)
-        gedlibpy.init_method()
-
-        ged_mat = np.empty((len(listID), len(listID)))
-        runtime_mat = np.empty((len(listID), len(listID)))
-
-        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(listID)):
-                g1 = listID[i]
-                g2 = listID[j]
-                gedlibpy.run_method(g1, g2)
-                upper_bound = gedlibpy.get_upper_bound(g1, g2)
-                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-            runtime_file.write('\n')
-
-        ged_file.close()
-        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-    print('runtime_mat:')
-    print(runtime_mat)
-
-    return
-
-
-
-def test_ged_default():
-    """Test ged computation with default settings.
-    """
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    for repeat in range(3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_default_' + str(repeat) + '.txt'
-        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_default_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-        runtime_file = open(runtime_filename, 'a')
-
-        gedlibpy.restart_env()
-        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
-        gedlibpy.init()
-        gedlibpy.set_method("IPFP", "")
-        gedlibpy.init_method()
-
-        ged_mat = np.empty((len(listID), len(listID)))
-        runtime_mat = np.empty((len(listID), len(listID)))
-
-        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(listID)):
-                g1 = listID[i]
-                g2 = listID[j]
-                gedlibpy.run_method(g1, g2)
-                upper_bound = gedlibpy.get_upper_bound(g1, g2)
-                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-            runtime_file.write('\n')
-
-        ged_file.close()
-        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-    print('runtime_mat:')
-    print(runtime_mat)
-
-    return
-
-
-def test_ged_min():
-    """Test ged computation with the "min" stabilizer.
-    """
-    from gklearn.utils.graphfiles import loadDataset
-    from gklearn.preimage.ged import GED
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
-
-#    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_min_' + str(repeat) + '.txt'
-#        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-#        runtime_file = open(runtime_filename, 'a')
-
-        ged_mat = np.empty((len(Gn), len(Gn)))
-#        runtime_mat = np.empty((len(Gn), len(Gn)))
-
-        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(Gn)):
-                g1 = Gn[i]
-                g2 = Gn[j]
-                upper_bound, _, _ = GED(g1, g2, lib='gedlibpy', cost='CONSTANT',
-                                method='IPFP',
-                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
-                                stabilizer='min', repeat=10)
-#                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-#                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-#                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-#            runtime_file.write('\n')
-
-        ged_file.close()
-#        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-#    print('runtime_mat:')
-#    print(runtime_mat)
-
-    return
-
-
-def init() :
-    print("List of Edit Cost Options : ")
-    for i in gedlibpy.list_of_edit_cost_options :
-        print (i)
-    print("")
-
-    print("List of Method Options : ")
-    for j in gedlibpy.list_of_method_options :
-        print (j)
-    print("")
-
-    print("List of Init Options : ")
-    for k in gedlibpy.list_of_init_options :
-        print (k)
-    print("")
-
-
-
-
-def convertGraph(G):
-    G_new = nx.Graph()
-    for nd, attrs in G.nodes(data=True):
-        G_new.add_node(str(nd), chem=attrs['atom'])
-    for nd1, nd2, attrs in G.edges(data=True):
-        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-
-    return G_new
-
-
-def testNxGrapĥ():
-    from gklearn.utils.graphfiles import loadDataset
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-
-    gedlibpy.restart_env()
-    for graph in Gn:
-        g_new = convertGraph(graph)
-        gedlibpy.add_nx_graph(g_new, "")
-
-    listID = gedlibpy.get_all_graph_ids()
-    gedlibpy.set_edit_cost("CHEM_1")
-    gedlibpy.init()
-    gedlibpy.set_method("IPFP", "")
-    gedlibpy.init_method()
-
-    print(listID)
-    g = listID[0]
-    h = listID[1]
-
-    gedlibpy.run_method(g, h)
-
-    print("Node Map : ", gedlibpy.get_node_map(g, h))
-    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
-    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
-
-if __name__ == '__main__':
-#    test_ged_default()
-#    test_ged_min()
-#    test_ged_best_settings()
-#    test_ged_best_settings_updated()
-#    test_ged_python_bash_cpp()
-#    test_get_nb_edit_operations()
-#    test_get_nb_edit_operations_letter()
-#    test_LETTER2_cost()
-    test_NON_SYMBOLIC_cost()
-
-
-    #init()
-    #testNxGrapĥ()
diff --git a/gklearn/preimage/test_iam.py b/gklearn/preimage/test_iam.py
deleted file mode 100644
index 5897f50..0000000
--- a/gklearn/preimage/test_iam.py
+++ /dev/null
@@ -1,964 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-#from gklearn.utils.logger2file import *
-from gklearn.preimage.iam import iam_upgraded
-from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
-#from gklearn.preimage.ged import ged_median
-
-
-def test_iam_monoterpenoides_with_init40():
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    # unfitted edit costs.
-    c_vi = 3
-    c_vr = 3
-    c_vs = 1
-    c_ei = 3
-    c_er = 3
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.0001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    ged_stabilizer = None
-#    ged_repeat = 50
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'algo_options': algo_options,
-                  'stabilizer': ged_stabilizer}
-
-    
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    
-    # classify graphs according to classes.
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_set_median_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    sod_list_list = []
-    for y in y_all:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y)
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        dis_ks_set_median_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(repeats):
-            # load median set.
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir)
-            Gn_candidate = [g.copy() for g in Gn_median]
-            
-            time0 = time.time()
-            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label, 
-                connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(G_gen_median_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_gen_median)
-            print('\nsmallest sod in graph space:', sod_gen_median)
-            sod_list_list.append(sod_list)
-            
-#            # show the best graph and save it to file.
-#            print('one of the possible corresponding pre-images is')
-#            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-#                    with_labels=True)
-##            plt.show()
-#    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-##            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
-##                        '_repeat' + str(repeat) + '_' + str(time.time()) +
-##                        '.png', format="PNG")
-#            plt.clf()
-#    #        print(G_gen_median_list[0].nodes(data=True))
-#    #        print(G_gen_median_list[0].edges(data=True))
-            
-        print('\nsods of the set median for this class:', sod_set_median_list[-1])
-        print('\nsods in graph space for this class:', sod_gs_list[-1])
-#        print('\ndistance in kernel space of set median for this class:', 
-#              dis_ks_set_median_list[-1])
-#        print('\nsmallest distances in kernel space for this class:', 
-#              dis_ks_min_list[-1])   
-        print('\ntimes for this class:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-#        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
-#        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_set_median_list)
-    print('\nmean sods in graph space for each class:', sod_gs_list)
-#    print('\ndistances in kernel space of set median for each class:', 
-#            dis_ks_set_median_list)
-#    print('\nmean smallest distances in kernel space for each class:', 
-#            dis_ks_min_list)
-    print('\nmean times for each class:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-#    print('\nmean distances in kernel space of set median of all:', 
-#            np.mean(dis_ks_set_median_list))
-#    print('\nmean smallest distances in kernel space of all:', 
-#            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-
-
-
-
-def test_iam_monoterpenoides():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    # parameters for GED function from the IAM paper.
-    # fitted edit costs (Gaussian).
-    c_vi = 0.03620133402089074
-    c_vr = 0.0417574590207099
-    c_vs = 0.009992282328587499
-    c_ei = 0.08293120042342755
-    c_er = 0.09512220476358019
-    c_es = 0.09222529696841467
-#    # fitted edit costs (linear combinations).
-#    c_vi = 0.1749684054238749
-#    c_vr = 0.0734054228711457
-#    c_vs = 0.05017781726016715
-#    c_ei = 0.1869431164806936
-#    c_er = 0.32055856948274
-#    c_es = 0.2569469379247611
-#    # unfitted edit costs.
-#    c_vi = 3
-#    c_vr = 3
-#    c_vs = 1
-#    c_ei = 3
-#    c_er = 3
-#    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # classify graphs according to letters.
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_set_median_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    sod_list_list = []
-    idx_dict = get_same_item_indices(y_all)
-    for y_class in idx_dict:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y_class)
-        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        dis_ks_set_median_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(50):
-            idx_rdm = random.sample(range(len(Gn_class)), 10)
-            print('graphs chosen:', idx_rdm)
-            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
-            Gn_candidate = [g.copy() for g in Gn_median]
-        
-            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
-            time0 = time.time()
-            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(G_gen_median_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_gen_median)
-            print('\nsmallest sod in graph space:', sod_gen_median)
-            sod_list_list.append(sod_list)
-            
-            # show the best graph and save it to file.
-            print('one of the possible corresponding pre-images is')
-            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-                    with_labels=True)
-#            plt.show()
-    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-#            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
-#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
-#                        '.png', format="PNG")
-            plt.clf()
-    #        print(G_gen_median_list[0].nodes(data=True))
-    #        print(G_gen_median_list[0].edges(data=True))
-            
-    
-            # compute distance between \psi and the set median graph.
-            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
-                gkernel, node_label, edge_label, False)
-            dhat_new_set_median_list = []
-            for idx, g_tmp in enumerate(G_set_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
-                    len(G_set_median_list) + len(Gn_median) + 1), 
-                    alpha_range, knew_set_median, withterm3=False))
-                
-            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
-            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
-            
-            
-            # compute distance between \psi and the new generated graphs.
-            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
-                              edge_label, False)
-            dhat_new_list = []
-            for idx, g_tmp in enumerate(G_gen_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
-                                    len(G_gen_median_list) + len(Gn_median) + 1), 
-                                    alpha_range, knew, withterm3=False))
-                
-            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-            dis_ks_min_list[-1].append(dhat_new_list[0])
-            
-
-        print('\nsods of the set median for this class:', sod_set_median_list[-1])
-        print('\nsods in graph space for this class:', sod_gs_list[-1])
-        print('\ndistance in kernel space of set median for this class:', 
-              dis_ks_set_median_list[-1])
-        print('\nsmallest distances in kernel space for this class:', 
-              dis_ks_min_list[-1])   
-        print('\ntimes for this class:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
-        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_set_median_list)
-    print('\nmean sods in graph space for each class:', sod_gs_list)
-    print('\ndistances in kernel space of set median for each class:', 
-            dis_ks_set_median_list)
-    print('\nmean smallest distances in kernel space for each class:', 
-            dis_ks_min_list)
-    print('\nmean times for each class:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-    print('\nmean distances in kernel space of set median of all:', 
-            np.mean(dis_ks_set_median_list))
-    print('\nmean smallest distances in kernel space of all:', 
-            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-    
-    nb_better_sods = 0
-    nb_worse_sods = 0
-    nb_same_sods = 0
-    for sods in sod_list_list:
-        if sods[0] > sods[-1]:
-            nb_better_sods += 1
-        elif sods[0] < sods[-1]:
-            nb_worse_sods += 1
-        else:
-            nb_same_sods += 1
-    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
-          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
-          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
-          'sods are improved.')
-    
-    
-def test_iam_mutag():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    # parameters for GED function from the IAM paper.
-    # fitted edit costs.
-    c_vi = 0.03523843108436513
-    c_vr = 0.03347339739350128
-    c_vs = 0.06871290673612238
-    c_ei = 0.08591999846720685
-    c_er = 0.07962086440894103
-    c_es = 0.08596855855478233
-    # unfitted edit costs.
-#    c_vi = 3
-#    c_vr = 3
-#    c_vs = 1
-#    c_ei = 3
-#    c_er = 3
-#    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # classify graphs according to letters.
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_set_median_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    sod_list_list = []
-    idx_dict = get_same_item_indices(y_all)
-    for y_class in idx_dict:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y_class)
-        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        dis_ks_set_median_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(50):
-            idx_rdm = random.sample(range(len(Gn_class)), 10)
-            print('graphs chosen:', idx_rdm)
-            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
-            Gn_candidate = [g.copy() for g in Gn_median]
-        
-            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
-            time0 = time.time()
-            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(G_gen_median_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_gen_median)
-            print('\nsmallest sod in graph space:', sod_gen_median)
-            sod_list_list.append(sod_list)
-            
-            # show the best graph and save it to file.
-            print('one of the possible corresponding pre-images is')
-            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-                    with_labels=True)
-#            plt.show()
-    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-#            plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) + 
-#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
-#                        '.png', format="PNG")
-            plt.clf()
-    #        print(G_gen_median_list[0].nodes(data=True))
-    #        print(G_gen_median_list[0].edges(data=True))
-            
-    
-            # compute distance between \psi and the set median graph.
-            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
-                gkernel, node_label, edge_label, False)
-            dhat_new_set_median_list = []
-            for idx, g_tmp in enumerate(G_set_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
-                    len(G_set_median_list) + len(Gn_median) + 1), 
-                    alpha_range, knew_set_median, withterm3=False))
-                
-            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
-            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
-            
-            
-            # compute distance between \psi and the new generated graphs.
-            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
-                              edge_label, False)
-            dhat_new_list = []
-            for idx, g_tmp in enumerate(G_gen_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
-                                    len(G_gen_median_list) + len(Gn_median) + 1), 
-                                    alpha_range, knew, withterm3=False))
-                
-            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-            dis_ks_min_list[-1].append(dhat_new_list[0])
-            
-
-        print('\nsods of the set median for this class:', sod_set_median_list[-1])
-        print('\nsods in graph space for this class:', sod_gs_list[-1])
-        print('\ndistance in kernel space of set median for this class:', 
-              dis_ks_set_median_list[-1])
-        print('\nsmallest distances in kernel space for this class:', 
-              dis_ks_min_list[-1])   
-        print('\ntimes for this class:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
-        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_set_median_list)
-    print('\nmean sods in graph space for each class:', sod_gs_list)
-    print('\ndistances in kernel space of set median for each class:', 
-            dis_ks_set_median_list)
-    print('\nmean smallest distances in kernel space for each class:', 
-            dis_ks_min_list)
-    print('\nmean times for each class:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-    print('\nmean distances in kernel space of set median of all:', 
-            np.mean(dis_ks_set_median_list))
-    print('\nmean smallest distances in kernel space of all:', 
-            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-    
-    nb_better_sods = 0
-    nb_worse_sods = 0
-    nb_same_sods = 0
-    for sods in sod_list_list:
-        if sods[0] > sods[-1]:
-            nb_better_sods += 1
-        elif sods[0] < sods[-1]:
-            nb_worse_sods += 1
-        else:
-            nb_same_sods += 1
-    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
-          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
-          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
-          'sods are improved.')
-    
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_iam_median_nb():
-    
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-#    # parameters for GED function
-#    c_vi = 0.037
-#    c_vr = 0.038
-#    c_vs = 0.075
-#    c_ei = 0.001
-#    c_er = 0.001
-#    c_es = 0.0
-#    ite_max_iam = 50
-#    epsilon_iam = 0.001
-#    removeNodes = False
-#    connected_iam = False
-#    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-#    ged_method = 'IPFP'
-#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    ged_stabilizer = 'min'
-#    ged_repeat = 50
-#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-#                  'edit_cost_constant': edit_cost_constant, 
-#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # parameters for GED function
-    c_vi = 4
-    c_vr = 4
-    c_vs = 2
-    c_ei = 1
-    c_er = 1
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-    ged_cost = 'CHEM_1'
-    ged_method = 'IPFP'
-    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-    # number of graphs; we what to compute the median of these graphs. 
-#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    nb_median_range = [len(Gn)]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-#    sod_gs_min_list = []
-#    nb_updated_list = []
-#    nb_updated_k_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        Gn_candidate = [g.copy() for g in Gn]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-#        km_tmp = gmfile['gm']
-#        time_km = gmfile['gmtime']
-#        # modify mixed gram matrix.
-#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-#        for i in range(len(Gn)):
-#            for j in range(i, len(Gn)):
-#                km[i, j] = km_tmp[i, j]
-#                km[j, i] = km[i, j]
-#        for i in range(len(Gn)):
-#            for j, idx in enumerate(idx_rdm):
-#                km[i, len(Gn) + j] = km[i, idx]
-#                km[len(Gn) + j, i] = km[i, idx]
-#        for i, idx1 in enumerate(idx_rdm):
-#            for j, idx2 in enumerate(idx_rdm):
-#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate, 
-            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-            params_ged=params_ged)
-            
-        time_total = time.time() - time0
-        print('\ntime: ', time_total)
-        time_list.append(time_total)
-        
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-        dhat_new_list = []
-        for idx, g_tmp in enumerate(ghat_new_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                                len(ghat_new_list) + len(Gn_median) + 1), 
-                                alpha_range, knew, withterm3=False))
-            
-        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-        dis_ks_min_list.append(dhat_new_list[0])
-        g_best.append(ghat_new_list[0])
-        
-        # show the best graph and save it to file.
-#        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'), 
-                with_labels=True)
-        plt.show()
-#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
-                    '.png', format="PNG")
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        sod_gs_list.append(sod_min)
-#        sod_gs_min_list.append(np.min(sod_min))
-        print('\nsmallest sod in graph space: ', sod_min)
-        
-    print('\nsods in graph space: ', sod_gs_list)
-#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-#          nb_updated_list)
-#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-#          nb_updated_k_list)
-    print('\ntimes:', time_list)
-    
-    
-def test_iam_letter_h():
-    from median import draw_Letter_graph
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-#    Gn = Gn[0:50]
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-    # parameters for GED function from the IAM paper.
-    c_vi = 3
-    c_vr = 3
-    c_vs = 1
-    c_ei = 3
-    c_er = 3
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'LETTER'
-    ged_method = 'IPFP'
-#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # classify graphs according to letters.
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    idx_dict = get_same_item_indices(y_all)
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------')
-        print('letter', letter)
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(50):
-            idx_rdm = random.sample(range(len(Gn_let)), 50)
-            print('graphs chosen:', idx_rdm)
-            Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
-            Gn_candidate = [g.copy() for g in Gn_median]
-        
-            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
-            time0 = time.time()
-            ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(ghat_new_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_min)
-            print('\nsmallest sod in graph space:', sod_min)
-            
-            # show the best graph and save it to file.
-            print('one of the possible corresponding pre-images is')
-            draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
-            
-            # compute distance between \psi and the new generated graphs.
-            knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-            dhat_new_list = []
-            for idx, g_tmp in enumerate(ghat_new_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                                    len(ghat_new_list) + len(Gn_median) + 1), 
-                                    alpha_range, knew, withterm3=False))
-                
-            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-            dis_ks_min_list[-1].append(dhat_new_list[0])            
-        
-        print('\nsods of the set median for this letter:', sod_set_median_list[-1])
-        print('\nsods in graph space for this letter:', sod_gs_list[-1])
-        print('\nsmallest distances in kernel space for this letter:', 
-              dis_ks_min_list[-1])
-        print('\ntimes for this letter:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print('\nmean sods of the set median for each letter:', sod_set_median_list)
-    print('\nmean sods in graph space for each letter:', sod_gs_list)
-    print('\nmean smallest distances in kernel space for each letter:', 
-            dis_ks_min_list)
-    print('\nmean times for each letter:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-    print('\nmean smallest distances in kernel space of all:', 
-            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-    
-    
-
-    
-
-
-    
-    
-
-def test_iam_fitdistance():
-    
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-#    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-#    lmbda = 0.03 # termination probalility
-#    # parameters for GED function
-#    c_vi = 0.037
-#    c_vr = 0.038
-#    c_vs = 0.075
-#    c_ei = 0.001
-#    c_er = 0.001
-#    c_es = 0.0
-#    ite_max_iam = 50
-#    epsilon_iam = 0.001
-#    removeNodes = False
-#    connected_iam = False
-#    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-#    ged_method = 'IPFP'
-#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    ged_stabilizer = 'min'
-#    ged_repeat = 50
-#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-#                  'edit_cost_constant': edit_cost_constant, 
-#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # parameters for GED function
-    c_vi = 4
-    c_vr = 4
-    c_vs = 2
-    c_ei = 1
-    c_er = 1
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-    ged_cost = 'CHEM_1'
-    ged_method = 'IPFP'
-    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-    # number of graphs; we what to compute the median of these graphs. 
-#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    nb_median_range = [10]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-    
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_gen_median_list = []
-    sod_gs_list = []
-#    sod_gs_min_list = []
-#    nb_updated_list = []
-#    nb_updated_k_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        Gn_candidate = [g.copy() for g in Gn_median]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-#        km_tmp = gmfile['gm']
-#        time_km = gmfile['gmtime']
-#        # modify mixed gram matrix.
-#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-#        for i in range(len(Gn)):
-#            for j in range(i, len(Gn)):
-#                km[i, j] = km_tmp[i, j]
-#                km[j, i] = km[i, j]
-#        for i in range(len(Gn)):
-#            for j, idx in enumerate(idx_rdm):
-#                km[i, len(Gn) + j] = km[i, idx]
-#                km[len(Gn) + j, i] = km[i, idx]
-#        for i, idx1 in enumerate(idx_rdm):
-#            for j, idx2 in enumerate(idx_rdm):
-#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, Gn_candidate, 
-            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-            params_ged=params_ged)
-            
-        time_total = time.time() - time0
-        print('\ntime: ', time_total)
-        time_list.append(time_total)
-        
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
-                              edge_label, False)
-        dhat_new_list = []
-        for idx, g_tmp in enumerate(G_gen_median_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
-                                len(G_gen_median_list) + len(Gn_median) + 1), 
-                                alpha_range, knew, withterm3=False))
-            
-        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-        dis_ks_min_list.append(dhat_new_list[0])
-        g_best.append(G_gen_median_list[0])
-        
-        # show the best graph and save it to file.
-#        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-                with_labels=True)
-        plt.show()
-#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-#        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
-#                    '.png', format="PNG")
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        sod_gs_list.append(sod_gen_median)
-#        sod_gs_min_list.append(np.min(sod_gen_median))
-        print('\nsmallest sod in graph space: ', sod_gen_median)
-        print('\nsmallest sod of set median in graph space: ', sod_set_median)
-        
-    print('\nsods in graph space: ', sod_gs_list)
-#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-#          nb_updated_list)
-#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-#          nb_updated_k_list)
-    print('\ntimes:', time_list)
-        
-    
-            
-    
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# tests on different numbers of median-sets.
-#    test_iam_median_nb()
-#    test_iam_letter_h()
-#    test_iam_monoterpenoides()
-#    test_iam_mutag()
-    
-#    test_iam_fitdistance()
-#    print("test log")
-    
-    test_iam_monoterpenoides_with_init40()
diff --git a/gklearn/preimage/test_k_closest_graphs.py b/gklearn/preimage/test_k_closest_graphs.py
deleted file mode 100644
index 56971c7..0000000
--- a/gklearn/preimage/test_k_closest_graphs.py
+++ /dev/null
@@ -1,462 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Dec 16 11:53:54 2019
-
-@author: ljia
-"""
-import numpy as np
-import math
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-from tqdm import tqdm
-from itertools import combinations, islice
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL
-#from gklearn.utils.logger2file import *
-from gklearn.preimage.iam import iam_upgraded, iam_bash
-from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix
-from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
-#from gklearn.preimage.ged import ged_median
-
-
-def fit_edit_cost_constants(fit_method, edit_cost_name, 
-                            edit_cost_constants=None, initial_solutions=1,
-                            Gn_median=None, node_label=None, edge_label=None,
-                            gkernel=None, dataset=None, init_ecc=None,
-                            Gn=None, Kmatrix_median=None):
-    """fit edit cost constants.    
-    """
-    if fit_method == 'random': # random
-        if edit_cost_name == 'LETTER':
-            edit_cost_constants = random.sample(range(1, 10), 3)
-            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
-        elif edit_cost_name == 'LETTER2':
-            random.seed(time.time())
-            edit_cost_constants = random.sample(range(1, 10), 5)
-#            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
-        elif edit_cost_name == 'NON_SYMBOLIC':
-            edit_cost_constants = random.sample(range(1, 10), 6)
-            if Gn_median[0].graph['node_attrs'] == []:
-                edit_cost_constants[2] = 0
-            if Gn_median[0].graph['edge_attrs'] == []:
-                edit_cost_constants[5] = 0
-        else:
-            edit_cost_constants = random.sample(range(1, 10), 6)
-        print('edit cost constants used:', edit_cost_constants)
-    elif fit_method == 'expert': # expert
-        if init_ecc is None:
-            if edit_cost_name == 'LETTER':
-                edit_cost_constants = [0.9, 1.7, 0.75] 
-            elif edit_cost_name == 'LETTER2':
-                edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
-            else:
-                edit_cost_constants = [3, 3, 1, 3, 3, 1] 
-        else:
-            edit_cost_constants = init_ecc
-    elif fit_method == 'k-graphs':
-        itr_max = 6
-        if init_ecc is None:
-            if edit_cost_name == 'LETTER':
-                init_costs = [0.9, 1.7, 0.75] 
-            elif edit_cost_name == 'LETTER2':
-                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
-            elif edit_cost_name == 'NON_SYMBOLIC':
-                init_costs = [0, 0, 1, 1, 1, 0]
-                if Gn_median[0].graph['node_attrs'] == []:
-                    init_costs[2] = 0
-                if Gn_median[0].graph['edge_attrs'] == []:
-                    init_costs[5] = 0
-            else:
-                init_costs = [3, 3, 1, 3, 3, 1] 
-        else:
-            init_costs = init_ecc
-        algo_options = '--threads 1 --initial-solutions ' \
-                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
-        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
-                      'algo_options': algo_options, 'stabilizer': None}
-        # fit on k-graph subset
-        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, 
-                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
-                init_costs=init_costs, dataset=dataset, Kmatrix=Kmatrix_median, 
-                parallel=True)
-    elif fit_method == 'whole-dataset':
-        itr_max = 6
-        if init_ecc is None:
-            if edit_cost_name == 'LETTER':
-                init_costs = [0.9, 1.7, 0.75] 
-            elif edit_cost_name == 'LETTER2':
-                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
-            else:
-                init_costs = [3, 3, 1, 3, 3, 1] 
-        else:
-            init_costs = init_ecc
-        algo_options = '--threads 1 --initial-solutions ' \
-                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
-        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
-                    'algo_options': algo_options, 'stabilizer': None}
-        # fit on all subset
-        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, 
-                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
-                init_costs=init_costs, dataset=dataset, parallel=True)
-    elif fit_method == 'precomputed':
-        pass
-    
-    return edit_cost_constants
-
-
-def compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
-                                     gkernel, edit_cost_name, 
-                                     Kmatrix_median=None):
-    # reform graphs.
-    set_median = loadGXL(fname_sm)
-    gen_median = loadGXL(fname_gm)
-#    print(gen_median.nodes(data=True))
-#    print(gen_median.edges(data=True))
-    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
-#        dataset == 'Fingerprint':
-#        for g in Gn_median:
-#            reform_attributes(g)
-        reform_attributes(set_median, Gn_median[0].graph['node_attrs'], 
-                          Gn_median[0].graph['edge_attrs'])
-        reform_attributes(gen_median, Gn_median[0].graph['node_attrs'], 
-                          Gn_median[0].graph['edge_attrs'])
-    
-    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
-        node_label = None
-        edge_label = None
-    else:
-        node_label = 'chem'
-        edge_label = 'valence'
-        
-    # compute Gram matrix for median set.
-    if Kmatrix_median is None:
-        Kmatrix_median = compute_kernel(Gn_median, gkernel, node_label, edge_label, False)
-        
-    # compute distance in kernel space for set median.
-    kernel_sm = []
-    for G_median in Gn_median:
-        km_tmp = compute_kernel([set_median, G_median], gkernel, node_label, edge_label, False)
-        kernel_sm.append(km_tmp[0, 1])
-    Kmatrix_sm = np.concatenate((np.array([kernel_sm]), np.copy(Kmatrix_median)), axis=0)
-    Kmatrix_sm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_sm]).T, Kmatrix_sm), axis=1)
-#    Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, 
-#                                node_label, edge_label, False)
-    dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), 
-                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False)
-#    print(gen_median.nodes(data=True))
-#    print(gen_median.edges(data=True))
-#    print(set_median.nodes(data=True))
-#    print(set_median.edges(data=True))
-    
-    # compute distance in kernel space for generalized median.
-    kernel_gm = []
-    for G_median in Gn_median:
-        km_tmp = compute_kernel([gen_median, G_median], gkernel, node_label, edge_label, False)
-        kernel_gm.append(km_tmp[0, 1])
-    Kmatrix_gm = np.concatenate((np.array([kernel_gm]), np.copy(Kmatrix_median)), axis=0)
-    Kmatrix_gm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_gm]).T, Kmatrix_gm), axis=1)
-#    Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, 
-#                                node_label, edge_label, False)
-    dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), 
-                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False)
-    
-    # compute distance in kernel space for each graph in median set.
-    dis_k_gi = []
-    for idx in range(len(Gn_median)):
-        dis_k_gi.append(dis_gstar(idx+1, range(1, 1+len(Gn_median)), 
-                             [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False))
-
-    print('dis_k_sm:', dis_k_sm)
-    print('dis_k_gm:', dis_k_gm)
-    print('dis_k_gi:', dis_k_gi)
-    idx_dis_k_gi_min = np.argmin(dis_k_gi)
-    dis_k_gi_min = dis_k_gi[idx_dis_k_gi_min]
-    print('min dis_k_gi:', dis_k_gi_min)    
-    
-    return dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min
-
-
-def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method,
-                               graph_dir=None, initial_solutions=1,
-                               edit_cost_constants=None, group_min=None, 
-                               dataset=None, edit_cost_name=None, init_ecc=None,
-                               Kmatrix=None, parallel=True):
-#    dataset = dataset.lower()
-    
-#    # compute distances in kernel space.
-#    dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-#                                              Kmatrix=None, gkernel=gkernel)
-#    # ged.
-#    gmfile = np.load('results/test_k_closest_graphs/ged_mat.fit_on_whole_dataset.with_medians.gm.npz')
-#    ged_mat = gmfile['ged_mat']
-#    dis_mat = ged_mat[0:len(Gn), 0:len(Gn)]
-    
-#    # choose k closest graphs
-#    time0 = time.time()
-#    sod_ks_min, group_min = get_closest_k_graphs(dis_mat, k, parallel)
-#    time_spent = time.time() - time0
-#    print('closest graphs:', sod_ks_min, group_min)
-#    print('time spent:', time_spent)
-#    group_min = (12, 13, 22, 29) # closest w.r.t path kernel
-#    group_min = (77, 85, 160, 171) # closest w.r.t ged
-#    group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel
-    Gn_median = [Gn[g].copy() for g in group_min]
-    if Kmatrix is not None:
-        Kmatrix_median = np.copy(Kmatrix[group_min,:])
-        Kmatrix_median = Kmatrix_median[:,group_min]
-    else:
-        Kmatrix_median = None
-        
-
-    # 1. fit edit cost constants. 
-    time0 = time.time()
-    edit_cost_constants = fit_edit_cost_constants(fit_method, edit_cost_name,
-        edit_cost_constants=edit_cost_constants, initial_solutions=initial_solutions,
-        Gn_median=Gn_median, node_label=node_label, edge_label=edge_label,
-        gkernel=gkernel, dataset=dataset, init_ecc=init_ecc,
-        Gn=Gn, Kmatrix_median=Kmatrix_median)
-    time_fitting = time.time() - time0
-    
-    
-    # 2. compute set median and gen median using IAM (C++ through bash).
-    print('\nstart computing set median and gen median using IAM (C++ through bash)...\n')
-    group_fnames = [Gn[g].graph['filename'] for g in group_min]
-    time0 = time.time()
-    sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constants,
-            cost=edit_cost_name, initial_solutions=initial_solutions,
-            graph_dir=graph_dir, dataset=dataset)
-    time_generating = time.time() - time0
-    print('\nmedians computed.\n')
-    
-    
-    # 3. compute distances to the true median.
-    print('\nstart computing distances to true median....\n')
-    Gn_median = [Gn[g].copy() for g in group_min]
-    dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min = \
-        compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
-                                         gkernel, edit_cost_name, 
-                                         Kmatrix_median=Kmatrix_median)
-    idx_dis_k_gi_min = group_min[idx_dis_k_gi_min]
-    print('index min dis_k_gi:', idx_dis_k_gi_min)
-    print('sod_sm:', sod_sm)
-    print('sod_gm:', sod_gm)
-    
-    # collect return values.
-    return (sod_sm, sod_gm), \
-           (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \
-           (time_fitting, time_generating)
-
-
-def reform_attributes(G, na_names=[], ea_names=[]):
-    if not na_names == []: 
-        for node in G.nodes:
-            G.nodes[node]['attributes'] = [G.node[node][a_name] for a_name in na_names]
-    if not ea_names == []:
-        for edge in G.edges:
-            G.edges[edge]['attributes'] = [G.edge[edge][a_name] for a_name in ea_names]
-
-
-def get_closest_k_graphs(dis_mat, k, parallel):
-    k_graph_groups = combinations(range(0, len(dis_mat)), k)
-    sod_ks_min = np.inf
-    if parallel:
-        len_combination = get_combination_length(len(dis_mat), k)
-        len_itr_max = int(len_combination if len_combination < 1e7 else 1e7)
-#        pos_cur = 0
-        graph_groups_slices = split_iterable(k_graph_groups, len_itr_max, len_combination)
-        for graph_groups_cur in graph_groups_slices:
-#        while True:
-#            graph_groups_cur = islice(k_graph_groups, pos_cur, pos_cur + len_itr_max)
-            graph_groups_cur_list = list(graph_groups_cur) 
-            print('current position:', graph_groups_cur_list[0])
-            len_itr_cur = len(graph_groups_cur_list)
-#            if len_itr_cur < len_itr_max:
-#                break
-
-            itr = zip(graph_groups_cur_list, range(0, len_itr_cur))
-            sod_k_list = np.empty(len_itr_cur)
-            graphs_list = [None] * len_itr_cur
-            n_jobs = multiprocessing.cpu_count()
-            chunksize = int(len_itr_max / n_jobs + 1)
-            n_jobs = multiprocessing.cpu_count()
-            def init_worker(dis_mat_toshare):
-                global G_dis_mat
-                G_dis_mat = dis_mat_toshare
-            pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(dis_mat,))
-#            iterator = tqdm(pool.imap_unordered(_get_closest_k_graphs_parallel, 
-#                                                itr, chunksize),
-#                            desc='Choosing k closest graphs', file=sys.stdout)
-            iterator = pool.imap_unordered(_get_closest_k_graphs_parallel, itr, chunksize)
-            for graphs, i, sod_ks in iterator:
-                sod_k_list[i] = sod_ks
-                graphs_list[i] = graphs
-            pool.close()
-            pool.join()
-            
-            arg_min = np.argmin(sod_k_list)
-            sod_ks_cur = sod_k_list[arg_min]
-            group_cur = graphs_list[arg_min]
-            if sod_ks_cur < sod_ks_min:
-                sod_ks_min = sod_ks_cur
-                group_min = group_cur
-                print('get closer graphs:', sod_ks_min, group_min)
-    else:        
-        for items in tqdm(k_graph_groups, desc='Choosing k closest graphs', file=sys.stdout):
-    #        if items[0] != itmp:
-    #            itmp = items[0]
-    #            print(items)
-            k_graph_pairs = combinations(items, 2)
-            sod_ks = 0
-            for i1, i2 in k_graph_pairs:
-                sod_ks += dis_mat[i1, i2]
-            if sod_ks < sod_ks_min:
-                sod_ks_min = sod_ks
-                group_min = items
-                print('get closer graphs:', sod_ks_min, group_min)
-                
-    return sod_ks_min, group_min
-
-
-def _get_closest_k_graphs_parallel(itr):
-    k_graph_pairs = combinations(itr[0], 2)
-    sod_ks = 0
-    for i1, i2 in k_graph_pairs:
-        sod_ks += G_dis_mat[i1, i2]
-
-    return itr[0], itr[1], sod_ks
-    
-
-def split_iterable(iterable, n, len_iter):
-    it = iter(iterable)
-    for i in range(0, len_iter, n):
-        piece = islice(it, n)
-        yield piece
-
-
-def get_combination_length(n, k):
-    len_combination = 1
-    for i in range(n, n - k, -1):
-        len_combination *= i
-    return int(len_combination / math.factorial(k))
-
-
-###############################################################################
-
-def test_k_closest_graphs():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-#    gkernel = 'untilhpathkernel'
-#    gkernel = 'weisfeilerlehmankernel'
-    gkernel = 'treeletkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    k = 5
-    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
-    
-#    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-#        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-#                                     'precomputed', edit_costs=edit_costs, 
-##                                     'k-graphs',
-#                                     parallel=False)
-#        
-#    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-#        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-#                                     'expert', parallel=False)
-        
-    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-                                     'expert', parallel=False)
-    return
-
-
-def test_k_closest_graphs_with_cv():
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    k = 4
-    
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    
-    sod_sm_list = []
-    sod_gm_list = []
-    dis_k_sm_list = []
-    dis_k_gm_list = []
-    dis_k_gi_min_list = []
-    for y in y_all:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y)
-        
-        sod_sm_list.append([])
-        sod_gm_list.append([])
-        dis_k_sm_list.append([])
-        dis_k_gm_list.append([])
-        dis_k_gi_min_list.append([])
-    
-        for repeat in range(repeats):
-            print('\nrepeat ', repeat)
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
-            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, 
-                                             k, 'whole-dataset', graph_dir=graph_dir,
-                                             parallel=False)
-            
-            sod_sm_list[-1].append(sod_sm)
-            sod_gm_list[-1].append(sod_gm)
-            dis_k_sm_list[-1].append(dis_k_sm)
-            dis_k_gm_list[-1].append(dis_k_gm)
-            dis_k_gi_min_list[-1].append(dis_k_gi_min)
-            
-        print('\nsods of the set median for this class:', sod_sm_list[-1])
-        print('\nsods of the gen median for this class:', sod_gm_list[-1])
-        print('\ndistances in kernel space of set median for this class:', 
-              dis_k_sm_list[-1])
-        print('\ndistances in kernel space of gen median for this class:', 
-              dis_k_gm_list[-1])
-        print('\ndistances in kernel space of min graph for this class:', 
-              dis_k_gi_min_list[-1])
-        
-        sod_sm_list[-1] = np.mean(sod_sm_list[-1])
-        sod_gm_list[-1] = np.mean(sod_gm_list[-1])
-        dis_k_sm_list[-1] = np.mean(dis_k_sm_list[-1])
-        dis_k_gm_list[-1] = np.mean(dis_k_gm_list[-1])
-        dis_k_gi_min_list[-1] = np.mean(dis_k_gi_min_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_sm_list)
-    print('\nmean sods of the gen median for each class:', sod_gm_list)
-    print('\nmean distance in kernel space of set median for each class:', 
-          dis_k_sm_list)
-    print('\nmean distances in kernel space of gen median for each class:', 
-          dis_k_gm_list)
-    print('\nmean distances in kernel space of min graph for each class:', 
-          dis_k_gi_min_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_sm_list))
-    print('\nmean sods of the gen median of all:', np.mean(sod_gm_list))
-    print('\nmean distances in kernel space of set median of all:', 
-            np.mean(dis_k_sm_list))
-    print('\nmean distances in kernel space of gen median of all:', 
-            np.mean(dis_k_gm_list))
-    print('\nmean distances in kernel space of min graph of all:', 
-            np.mean(dis_k_gi_min_list))
-    
-    return
-    
-
-if __name__ == '__main__':
-    test_k_closest_graphs()
-#    test_k_closest_graphs_with_cv()
\ No newline at end of file
diff --git a/gklearn/preimage/test_median_preimage_generator.py b/gklearn/preimage/test_median_preimage_generator.py
deleted file mode 100644
index 2f458af..0000000
--- a/gklearn/preimage/test_median_preimage_generator.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Mar 27 17:30:55 2020
-
-@author: ljia
-"""
-import multiprocessing
-import functools
-from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
-from gklearn.preimage import MedianPreimageGenerator
-from gklearn.utils import Dataset
-
-
-def test_median_preimage_generator():
-	
-	# 1. set parameters.
-	print('1. setting parameters...')
-	ds_name = 'Letter-high'
-	mpg = MedianPreimageGenerator()
-	mpg_options = {'fit_method': 'k-graphs',
-				   'init_ecc': [3, 3, 1, 3, 3],
-				   'ds_name': 'Letter-high',
-				   'parallel': True,
-				   'time_limit_in_sec': 0,
-				   'max_itrs': 100,
-				   'max_itrs_without_update': 3,
-				   'epsilon_ratio': 0.01,
-				   'verbose': 2}
-	mpg.set_options(**mpg_options)
-	mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
-	sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
-	mpg.kernel_options = {'name': 'structuralspkernel',
-					      'edge_weight': None,
-						  'node_kernels': sub_kernels,
-						  'edge_kernels': sub_kernels, 
-						  'compute_method': 'naive',
-						  'parallel': 'imap_unordered', 
-# 						  'parallel': None, 
-						  'n_jobs': multiprocessing.cpu_count(),
-						  'normalize': True,
-						  'verbose': 2}
-	mpg.ged_options = {'method': 'IPFP',
-					   'initial_solutions': 40,
-					   'edit_cost': 'LETTER2',
-					   'attr_distance': 'euclidean',
-					   'ratio_runs_from_initial_solutions': 1,
-					   'threads': multiprocessing.cpu_count(),
-					   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
-	mpg.mge_options = {'init_type': 'MEDOID',
-					   'random_inits': 10,
-					   'time_limit': 600,
-					   'verbose': 2,
-					   'refine': False}
-	
-	
-	# 2. get dataset.
-	print('2. getting dataset...')
-	mpg.dataset = Dataset()
-	mpg.dataset.load_predefined_dataset(ds_name)
-	mpg.dataset.cut_graphs(range(0, 10))
-		
-	# 3. compute median preimage.
-	print('3. computing median preimage...')
-	mpg.run()
-	
-	
-if __name__ == '__main__':
-	test_median_preimage_generator()
\ No newline at end of file
diff --git a/gklearn/preimage/test_others.py b/gklearn/preimage/test_others.py
deleted file mode 100644
index a277a17..0000000
--- a/gklearn/preimage/test_others.py
+++ /dev/null
@@ -1,686 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jul  4 12:20:16 2019
-
-@author: ljia
-"""
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.median import draw_Letter_graph
-from gklearn.preimage.ged import GED, ged_median
-from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \
-    dis_gstar, remove_edges
-
-
-# --------------------------- These are tests --------------------------------#
-    
-def test_who_is_the_closest_in_kernel_space(Gn):
-    idx_gi = [0, 6]
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    # create the "median" graph.
-    gnew = g2.copy()
-    gnew.remove_node(0)
-    nx.draw_networkx(gnew)
-    plt.show()
-    print(gnew.nodes(data=True))
-    Gn = [gnew] + Gn
-    
-    # compute gram matrix
-    Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
-    # the distance matrix
-    dmatrix = gram2distances(Kmatrix)
-    print(np.sort(dmatrix[idx_gi[0] + 1]))
-    print(np.argsort(dmatrix[idx_gi[0] + 1]))
-    print(np.sort(dmatrix[idx_gi[1] + 1]))
-    print(np.argsort(dmatrix[idx_gi[1] + 1]))
-    # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
-    dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
-    print(np.sort(dis_median))
-    print(np.argsort(dis_median))
-    return
-
-
-def test_who_is_the_closest_in_GED_space(Gn):
-    idx_gi = [0, 6]
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    # create the "median" graph.
-    gnew = g2.copy()
-    gnew.remove_node(0)
-    nx.draw_networkx(gnew)
-    plt.show()
-    print(gnew.nodes(data=True))
-    Gn = [gnew] + Gn
-    
-    # compute GEDs
-    ged_matrix = np.zeros((len(Gn), len(Gn)))
-    for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-        for i2 in range(len(Gn)):
-            dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
-            ged_matrix[i1, i2] = dis
-    print(np.sort(ged_matrix[idx_gi[0] + 1]))
-    print(np.argsort(ged_matrix[idx_gi[0] + 1]))
-    print(np.sort(ged_matrix[idx_gi[1] + 1]))
-    print(np.argsort(ged_matrix[idx_gi[1] + 1]))
-    # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
-    dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
-    print(np.sort(dis_median))
-    print(np.argsort(dis_median))
-    return
-
-
-def test_will_IAM_give_the_median_graph_we_wanted(Gn):
-    idx_gi = [0, 6]
-    g1 = Gn[idx_gi[0]].copy()
-    g2 = Gn[idx_gi[1]].copy()
-#    del Gn[idx_gi[0]]
-#    del Gn[idx_gi[1] - 1]
-    g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
-#    g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
-    nx.draw_networkx(g_median)
-    plt.show()
-    print(g_median.nodes(data=True))
-    print(g_median.edges(data=True))
-    
-    
-def test_new_IAM_allGraph_deleteNodes(Gn):
-    idx_gi = [0, 6]
-#    g1 = Gn[idx_gi[0]].copy()
-#    g2 = Gn[idx_gi[1]].copy()
-
-#    g1 = nx.Graph(name='haha')
-#    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
-#    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
-#    g2 = nx.Graph(name='hahaha')
-#    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
-#                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
-#    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-#                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
-    
-    g1 = nx.Graph(name='haha')
-    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
-    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-    g2 = nx.Graph(name='hahaha')
-    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
-    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-
-#    g2 = g1.copy()
-#    g2.add_nodes_from([(3, {'atom': 'O'})])
-#    g2.add_nodes_from([(4, {'atom': 'C'})])
-#    g2.add_edges_from([(1, 3, {'bond_type': '1'})])
-#    g2.add_edges_from([(3, 4, {'bond_type': '1'})])
-
-#    del Gn[idx_gi[0]]
-#    del Gn[idx_gi[1] - 1]
-    
-    nx.draw_networkx(g1)
-    plt.show()
-    print(g1.nodes(data=True))
-    print(g1.edges(data=True))
-    nx.draw_networkx(g2)
-    plt.show()
-    print(g2.nodes(data=True))
-    print(g2.edges(data=True))
-    
-    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
-#    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
-    nx.draw_networkx(g_median)
-    plt.show()
-    print(g_median.nodes(data=True))
-    print(g_median.edges(data=True))
-    
-    
-def test_the_simple_two(Gn, gkernel):
-    from gk_iam import gk_iam_nearest_multi
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # recursions
-    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 2 # k nearest neighbors
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    Gn_mix = [g.copy() for g in Gn]
-    Gn_mix.append(g1.copy())
-    Gn_mix.append(g2.copy())
-    
-#    g_tmp = iam([g1, g2])
-#    nx.draw_networkx(g_tmp)
-#    plt.show()
-    
-    # compute 
-#    k_list = [] # kernel between each graph and itself.
-#    k_g1_list = [] # kernel between each graph and g1
-#    k_g2_list = [] # kernel between each graph and g2
-#    for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout): 
-#        ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
-#        k_list.append(ktemp[0][0, 0])
-#        k_g1_list.append(ktemp[0][0, 1])
-#        k_g2_list.append(ktemp[0][0, 2])
-        
-    km = compute_kernel(Gn_mix, gkernel, True)
-#    k_list = np.diag(km) # kernel between each graph and itself.
-#    k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
-#    k_g2_list = km[idx_gi[1]] # kernel between each graph and g2    
-
-    g_best = []
-    dis_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('alpha =', alpha)
-        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
-                                               range(len(Gn), len(Gn) + 2), km,
-                                               k, r_max,gkernel)
-        dis_best.append(dhat)
-        g_best.append(ghat_list)
-        
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-        print('the corresponding pre-images are')
-        for g in g_best[idx]:
-            nx.draw_networkx(g)
-            plt.show()
-            print(g.nodes(data=True))
-            print(g.edges(data=True))
-            
-    
-def test_remove_bests(Gn, gkernel):
-    from gk_iam import gk_iam_nearest_multi
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # recursions
-    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 20 # k nearest neighbors
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    # remove the best 2 graphs.
-    del Gn[idx_gi[0]]
-    del Gn[idx_gi[1] - 1]
-#    del Gn[8]
-    
-    Gn_mix = [g.copy() for g in Gn]
-    Gn_mix.append(g1.copy())
-    Gn_mix.append(g2.copy())
-
-    
-    # compute
-    km = compute_kernel(Gn_mix, gkernel, True)
-    g_best = []
-    dis_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('alpha =', alpha)
-        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
-                                               range(len(Gn), len(Gn) + 2), km, 
-                                               k, r_max, gkernel)
-        dis_best.append(dhat)
-        g_best.append(ghat_list)
-        
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-        print('the corresponding pre-images are')
-        for g in g_best[idx]:
-            draw_Letter_graph(g)
-#            nx.draw_networkx(g)
-#            plt.show()
-            print(g.nodes(data=True))
-            print(g.edges(data=True))
-            
-            
-###############################################################################
-# Tests on dataset Letter-H.
-            
-def test_gkiam_letter_h():
-    from gk_iam import gk_iam_nearest_multi
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # recursions
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 10 # k nearest neighbors
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------\n')
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        time0 = time.time()
-        km = compute_kernel(Gn_mix, gkernel, True)
-        g_best = []
-        dis_best = []
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, 
-                Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)), 
-                km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
-                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/gk_iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in graph space. (alpha range not considered.)
-        sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', 
-                                     ged_method='IPFP', saveGXL='gedlib-letter')
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        sod_ks_min_list.append(sod_ks)
-        nb_updated_list.append(nb_updated)
-        
-                
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
-    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
-    print('\nnumber of updates for each letter: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-
-#def compute_letter_median_by_average(Gn):
-#    return g_median
-    
-
-def test_iam_letter_h():
-    from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    
-    lmbda = 0.03 # termination probalility
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_list = []
-    sod_min_list = []
-    for letter in idx_dict:        
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        g_best = []
-        dis_best = []
-        time0 = time.time()
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
-                Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
-                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in kernel space. (alpha range not considered.)
-        gkernel = 'structuralspkernel'        
-        sod_tmp = []
-        Gn_mix = g_best[0] + Gn_let
-        km = compute_kernel(Gn_mix, gkernel, True)
-        for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
-            dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)), 
-                              [alpha_range[0]] * len(Gn_let), km, withterm3=False)
-            sod_tmp.append(dtemp)
-        sod_list.append(sod_tmp)
-        sod_min_list.append(np.min(sod_tmp))
-        
-                
-    print('\nsods in kernel space: ', sod_list)
-    print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
-    print('\ntimes:', time_list)
-    
-    
-def test_random_preimage_letter_h():
-    from preimage_random import preimage_random
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
-#            'extra_params': {}} # node symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-#    lmbda = 0.03 # termination probalility
-    r_max = 3 # 10 # recursions
-    l = 500
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    #alpha_range = np.linspace(0.1, 0.9, 9)
-    k = 10 # 5 # k nearest neighbors
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_list = []
-    sod_min_list = []
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------\n')
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        time0 = time.time()
-        km = compute_kernel(Gn_mix, gkernel, True)
-        g_best = []
-        dis_best = []
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let), 
-                                                   range(len(Gn_let), len(Gn_mix)), km, 
-                                                   k, r_max, gkernel, c_ei=1.7, 
-                                                   c_er=1.7, c_es=1.7)
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/gk_iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in graph space. (alpha range not considered.)
-        sod_tmp, _ = ged_median(g_best[0], Gn_let)
-        sod_list.append(sod_tmp)
-        sod_min_list.append(np.min(sod_tmp))
-        
-                
-    print('\nsods in graph space: ', sod_list)
-    print('\nsmallest sod in graph space for each letter: ', sod_min_list)               
-    print('\ntimes:', time_list)
-    
-    
-
-    
-    
-    
-    
-def test_gkiam_mutag():
-    from gk_iam import gk_iam_nearest_multi
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # recursions
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 20 # k nearest neighbors
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------\n')
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        time0 = time.time()
-        km = compute_kernel(Gn_mix, gkernel, True)
-        g_best = []
-        dis_best = []
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), 
-                                                   range(len(Gn_let), len(Gn_mix)), km, 
-                                                   k, r_max, gkernel, c_ei=1.7, 
-                                                   c_er=1.7, c_es=1.7)
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/gk_iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in graph space. (alpha range not considered.)
-        sod_tmp, _ = ged_median(g_best[0], Gn_let)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        sod_ks_min_list.append(sod_ks)
-        nb_updated_list.append(nb_updated)
-        
-                
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
-    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
-    print('\nnumber of updates for each letter: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-    
-    
-###############################################################################
-# Re-test.
-    
-def retest_the_simple_two():
-    from gk_iam import gk_iam_nearest_multi
-    
-    # The two simple graphs.
-#    g1 = nx.Graph(name='haha')
-#    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
-#    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
-#    g2 = nx.Graph(name='hahaha')
-#    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
-#                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
-#    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-#                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
-    
-    g1 = nx.Graph(name='haha')
-    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
-    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-    g2 = nx.Graph(name='hahaha')
-    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
-    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-    
-#    # randomly select two molecules
-#    np.random.seed(1)
-#    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
-#    g1 = Gn[idx_gi[0]]
-#    g2 = Gn[idx_gi[1]]
-#    Gn_mix = [g.copy() for g in Gn]
-#    Gn_mix.append(g1.copy())
-#    Gn_mix.append(g2.copy())
-    
-    Gn = [g1.copy(), g2.copy()]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # recursions
-#    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 2 # k nearest neighbors
-    epsilon = 1e-6
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    c_ei=1
-    c_er=1
-    c_es=1
-    
-    Gn_mix = Gn + [g1.copy(), g2.copy()]
-    
-    # compute         
-    time0 = time.time()
-    km = compute_kernel(Gn_mix, gkernel, True)
-    time_km = time.time() - time0
-
-    time_list = []
-    sod_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []       
-    g_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('\n-------------------------------------------------------\n')
-        print('alpha =', alpha)
-        time0 = time.time()
-        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
-            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
-            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        sod_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        nb_updated_list.append(nb_updated)       
-        
-    # show best graphs and save them to file.
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
-        print('one of the possible corresponding pre-images is')
-        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                with_labels=True)
-        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
-        plt.show()
-        print(g_best[idx][0].nodes(data=True))
-        print(g_best[idx][0].edges(data=True))
-        
-#        for g in g_best[idx]:
-#            draw_Letter_graph(g, savepath='results/gk_iam/')
-##            nx.draw_networkx(g)
-##            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-    # compute the corresponding sod in graph space.
-    for idx, item in enumerate(alpha_range):
-        sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-    print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list) 
-    print('\nnumber of updates for each alpha: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-            
-        
-
-if __name__ == '__main__':
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-#          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
-#        'extra_params': {}} # node symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:20]
-    
-#    import networkx.algorithms.isomorphism as iso
-#    G1 = nx.MultiDiGraph()
-#    G2 = nx.MultiDiGraph()
-#    G1.add_nodes_from([1,2,3], fill='red')
-#    G2.add_nodes_from([10,20,30,40], fill='red')
-#    nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
-#    nx.add_path(G2, [10,20,30,40], weight=3)
-#    nm = iso.categorical_node_match('fill', 'red')
-#    print(nx.is_isomorphic(G1, G2, node_match=nm))
-#    
-#    test_new_IAM_allGraph_deleteNodes(Gn)
-#    test_will_IAM_give_the_median_graph_we_wanted(Gn)
-#    test_who_is_the_closest_in_GED_space(Gn)
-#    test_who_is_the_closest_in_kernel_space(Gn)
-    
-#    test_the_simple_two(Gn, 'untilhpathkernel')
-#    test_remove_bests(Gn, 'untilhpathkernel')
-#    test_gkiam_letter_h()
-#    test_iam_letter_h()
-#    test_random_preimage_letter_h
-    
-###############################################################################
-# retests.
-    retest_the_simple_two()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_iam.py b/gklearn/preimage/test_preimage_iam.py
deleted file mode 100644
index 9b05dd9..0000000
--- a/gklearn/preimage/test_preimage_iam.py
+++ /dev/null
@@ -1,620 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices
-from gklearn.preimage.ged import ged_median
-
-from gklearn.preimage.preimage_iam import preimage_iam 
-
-
-###############################################################################
-# tests on different values on grid of median-sets and k.
-
-def test_preimage_iam_grid_k_median_nb():       
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-#    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    # number of nearest neighbors.
-    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    nb_updated_k_list = []
-    g_best = []
-    for idx_nb, nb_median in enumerate(nb_median_range):
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        sod_gs_min_list.append([])
-        nb_updated_list.append([])
-        nb_updated_k_list.append([])
-        g_best.append([])   
-        
-        for k in k_range:
-            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
-            print('k =', k)
-            time0 = time.time()
-            dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
-                preimage_iam(Gn, Gn_median,
-                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-                gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
-                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                            'removeNodes': removeNodes, 'connected': connected_iam},
-                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                            'saveGXL': saveGXL})
-                
-            time_total = time.time() - time0 + time_km
-            print('time: ', time_total)
-            time_list[idx_nb].append(time_total)
-            print('\nsmallest distance in kernel space: ', dhat) 
-            dis_ks_min_list[idx_nb].append(dhat)
-            g_best[idx_nb].append(ghat_list)
-            print('\nnumber of updates of the best graph by IAM: ', nb_updated)
-            nb_updated_list[idx_nb].append(nb_updated)
-            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k)
-            nb_updated_k_list[idx_nb].append(nb_updated_k)
-            
-            # show the best graph and save it to file.
-            print('the shortest distance is', dhat)
-            print('one of the possible corresponding pre-images is')
-            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                    with_labels=True)
-            plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
-                        '_k' + str(k) + '.png', format="PNG")
-    #        plt.show()
-            plt.clf()
-    #        print(ghat_list[0].nodes(data=True))
-    #        print(ghat_list[0].edges(data=True))
-        
-            # compute the corresponding sod in graph space.
-            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                         ged_method=ged_method, saveGXL=saveGXL)
-            sod_gs_list[idx_nb].append(sod_tmp)
-            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
-            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
-          sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
-          nb_updated_list)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
-          nb_updated_k_list)
-    print('\ntimes:', time_list)
-    
-    
-    
-    
-
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_preimage_iam_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # iteration limit for pre-image.
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    # parameters for IAM function
-#    c_vi = 0.037
-#    c_vr = 0.038
-#    c_vs = 0.075
-#    c_ei = 0.001
-#    c_er = 0.001
-#    c_es = 0.0
-    c_vi = 4
-    c_vr = 4
-    c_vs = 2
-    c_ei = 1
-    c_er = 1
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    # parameters for GED function
-#    ged_cost='CHEM_1'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # number of graphs; we what to compute the median of these graphs. 
-#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    nb_median_range = [2]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    nb_updated_k_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
-            preimage_iam(Gn, Gn_median,
-            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-            gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
-            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                        'removeNodes': removeNodes, 'connected': connected_iam},
-            params_ged=params_ged)
-            
-        time_total = time.time() - time0 + time_km
-        print('\ntime: ', time_total)
-        time_list.append(time_total)
-        print('\nsmallest distance in kernel space: ', dhat) 
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        print('\nnumber of updates of the best graph: ', nb_updated)
-        nb_updated_list.append(nb_updated)
-        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
-        nb_updated_k_list.append(nb_updated_k)
-        
-        # show the best graph and save it to file.
-        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                with_labels=True)
-        plt.show()
-#        plt.savefig('results/preimage_iam/mutag_median_cs.001_nb' + str(nb_median) + 
-#                    '.png', format="PNG")
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, params_ged=params_ged)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-          nb_updated_list)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-          nb_updated_k_list)
-    print('\ntimes:', time_list)
-    
-    
-    
-    
-    
-
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-
-def test_gkiam_2combination_all_pairs():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = False
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    nb_update_mat = np.full((len(Gn), len(Gn)), np.inf)
-    # test on each pair of graphs.
-#    for idx1 in range(len(Gn) - 1, -1, -1):
-#        for idx2 in range(idx1, -1, -1):
-    for idx1 in range(187, 188):
-        for idx2 in range(167, 168):
-            g1 = Gn[idx1].copy()
-            g2 = Gn[idx2].copy()
-        #    Gn[10] = []
-        #    Gn[10] = []
-            
-            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-            plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG")
-            plt.show()
-            plt.clf()
-            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-            plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG")
-            plt.show()
-            plt.clf()
-
-            ###################################################################            
-#            Gn_mix = [g.copy() for g in Gn]
-#            Gn_mix.append(g1.copy())
-#            Gn_mix.append(g2.copy())
-#            
-#            # compute
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-#            
-#            # write Gram matrix to file and read it.
-#            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
-            
-            ###################################################################
-            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
-            km = gmfile['gm']
-            time_km = gmfile['gmtime']
-            # modify mixed gram matrix.
-            for i in range(len(Gn)):
-                km[i, len(Gn)] = km[i, idx1]
-                km[i, len(Gn) + 1] = km[i, idx2]
-                km[len(Gn), i] = km[i, idx1]
-                km[len(Gn) + 1, i] = km[i, idx2]
-            km[len(Gn), len(Gn)] = km[idx1, idx1]
-            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
-            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
-            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
-            
-            ###################################################################
-#            # use only the two graphs in median set as candidates.
-#            Gn = [g1.copy(), g2.copy()]
-#            Gn_mix = Gn + [g1.copy(), g2.copy()]
-#            # compute         
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-    
-            
-            time_list = []
-            dis_ks_min_list = []
-            sod_gs_list = []
-            sod_gs_min_list = []
-            nb_updated_list = []
-            nb_updated_k_list = [] 
-            g_best = []
-            # for each alpha
-            for alpha in alpha_range:
-                print('\n-------------------------------------------------------\n')
-                print('alpha =', alpha)
-                time0 = time.time()
-                dhat, ghat_list, sod_ks, nb_updated, nb_updated_k = \
-                    preimage_iam(Gn, [g1, g2],
-                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-                    gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
-                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                                'removeNodes': removeNodes, 'connected': connected_iam},
-                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                                'saveGXL': saveGXL})
-                time_total = time.time() - time0 + time_km
-                print('time: ', time_total)
-                time_list.append(time_total)
-                dis_ks_min_list.append(dhat)
-                g_best.append(ghat_list)
-                nb_updated_list.append(nb_updated)
-                nb_updated_k_list.append(nb_updated_k)
-                
-            # show best graphs and save them to file.
-            for idx, item in enumerate(alpha_range):
-                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-                print('one of the possible corresponding pre-images is')
-                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                        with_labels=True)
-                plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' + str(idx2) 
-                            + '_alpha' + str(item) + '.png', format="PNG")
-#                plt.show()
-                plt.clf()
-#                print(g_best[idx][0].nodes(data=True))
-#                print(g_best[idx][0].edges(data=True))
-                
-        #        for g in g_best[idx]:
-        #            draw_Letter_graph(g, savepath='results/gk_iam/')
-        ##            nx.draw_networkx(g)
-        ##            plt.show()
-        #            print(g.nodes(data=True))
-        #            print(g.edges(data=True))
-                    
-            # compute the corresponding sod in graph space.
-            for idx, item in enumerate(alpha_range):
-                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
-                                             ged_method=ged_method, saveGXL=saveGXL)
-                sod_gs_list.append(sod_tmp)
-                sod_gs_min_list.append(np.min(sod_tmp))
-                
-            print('\nsods in graph space: ', sod_gs_list)
-            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-            print('\nnumber of updates of the best graph for each alpha: ', 
-                  nb_updated_list)
-            print('\nnumber of updates of the k nearest graphs for each alpha: ', 
-                  nb_updated_k_list)
-            print('\ntimes:', time_list)
-            nb_update_mat[idx1, idx2] = nb_updated_list[0]
-            
-            str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2, nb_updated_list[0])
-            with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file:
-                content = file.read()
-                file.seek(0, 0)
-                file.write(str_fw + content)
-    
-    
-
-def test_gkiam_2combination():
-    from gk_iam import gk_iam_nearest_multi
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 20 # k nearest neighbors
-    epsilon = 1e-6
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    c_ei=1
-    c_er=1
-    c_es=1
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [10, 11] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]].copy()
-    g2 = Gn[idx_gi[1]].copy()
-#    Gn[10] = []
-#    Gn[10] = []
-    
-#    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
-#    plt.show()
-#    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
-#    plt.show() 
-    
-    Gn_mix = [g.copy() for g in Gn]
-    Gn_mix.append(g1.copy())
-    Gn_mix.append(g2.copy())
-    
-    # compute
-#    time0 = time.time()
-#    km = compute_kernel(Gn_mix, gkernel, True)
-#    time_km = time.time() - time0
-    
-    # write Gram matrix to file and read it.
-#    np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km)
-    gmfile = np.load('results/gram_matrix.gm.npz')
-    km = gmfile['gm']
-    time_km = gmfile['gmtime']
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []       
-    g_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('\n-------------------------------------------------------\n')
-        print('alpha =', alpha)
-        time0 = time.time()
-        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
-            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
-            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        nb_updated_list.append(nb_updated)       
-        
-    # show best graphs and save them to file.
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-        print('one of the possible corresponding pre-images is')
-        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                with_labels=True)
-        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
-        plt.show()
-        print(g_best[idx][0].nodes(data=True))
-        print(g_best[idx][0].edges(data=True))
-        
-#        for g in g_best[idx]:
-#            draw_Letter_graph(g, savepath='results/gk_iam/')
-##            nx.draw_networkx(g)
-##            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-    # compute the corresponding sod in graph space.
-    for idx, item in enumerate(alpha_range):
-        sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-    print('\nnumber of updates for each alpha: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-    
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-#    test_gkiam_2combination()
-#    test_gkiam_2combination_all_pairs()
-    
-###############################################################################
-# tests on different numbers of median-sets.
-    test_preimage_iam_median_nb()
-    
-###############################################################################
-# tests on different values on grid of median-sets and k.
-#    test_preimage_iam_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_mix.py b/gklearn/preimage/test_preimage_mix.py
deleted file mode 100644
index 888de86..0000000
--- a/gklearn/preimage/test_preimage_mix.py
+++ /dev/null
@@ -1,539 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.ged import ged_median
-from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
-from gklearn.preimage.preimage_iam import preimage_iam_random_mix
-
-###############################################################################
-# tests on different values on grid of median-sets and k.
-
-def test_preimage_mix_grid_k_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l_max = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-#    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    InitRandomWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    # number of nearest neighbors.
-    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list_iam = []
-    nb_updated_list_random = []
-    nb_updated_k_list_iam = []
-    nb_updated_k_list_random = []
-    g_best = []
-    for idx_nb, nb_median in enumerate(nb_median_range):
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        sod_gs_min_list.append([])
-        nb_updated_list_iam.append([])
-        nb_updated_list_random.append([])
-        nb_updated_k_list_iam.append([])
-        nb_updated_k_list_random.append([])
-        g_best.append([])   
-        
-        for k in k_range:
-            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
-            print('k =', k)
-            time0 = time.time()
-            dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
-                nb_updated_k_iam, nb_updated_k_random = \
-                preimage_iam_random_mix(Gn, Gn_median,
-                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-                l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
-                InitRandomWithAllDk=InitRandomWithAllDk,
-                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                            'removeNodes': removeNodes, 'connected': connected_iam},
-                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                            'saveGXL': saveGXL})
-                
-            time_total = time.time() - time0 + time_km
-            print('time: ', time_total)
-            time_list[idx_nb].append(time_total)
-            print('\nsmallest distance in kernel space: ', dhat) 
-            dis_ks_min_list[idx_nb].append(dhat)
-            g_best[idx_nb].append(ghat_list)
-            print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
-            nb_updated_list_iam[idx_nb].append(nb_updated_iam)
-            print('\nnumber of updates of the best graph by random generation: ', 
-                  nb_updated_random)
-            nb_updated_list_random[idx_nb].append(nb_updated_random)
-            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
-            nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam)
-            print('\nnumber of updates of k nearest graphs by random generation: ', 
-                  nb_updated_k_random)
-            nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) 
-            
-            # show the best graph and save it to file.
-            print('the shortest distance is', dhat)
-            print('one of the possible corresponding pre-images is')
-            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                    with_labels=True)
-            plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
-                        '_k' + str(k) + '.png', format="PNG")
-    #        plt.show()
-            plt.clf()
-    #        print(ghat_list[0].nodes(data=True))
-    #        print(ghat_list[0].edges(data=True))
-        
-            # compute the corresponding sod in graph space.
-            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                         ged_method=ged_method, saveGXL=saveGXL)
-            sod_gs_list[idx_nb].append(sod_tmp)
-            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
-            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
-          sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
-          nb_updated_list_iam)
-    print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', 
-          nb_updated_list_random)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
-          nb_updated_k_list_iam)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', 
-          nb_updated_k_list_random)
-    print('\ntimes:', time_list)
-    
-    
-
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_preimage_mix_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l_max = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    InitRandomWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list_iam = []
-    nb_updated_list_random = []
-    nb_updated_k_list_iam = []
-    nb_updated_k_list_random = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
-            nb_updated_k_iam, nb_updated_k_random = \
-            preimage_iam_random_mix(Gn, Gn_median,
-            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-            l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
-            InitRandomWithAllDk=InitRandomWithAllDk,
-            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                        'removeNodes': removeNodes, 'connected': connected_iam},
-            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                        'saveGXL': saveGXL})
-            
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        print('\nsmallest distance in kernel space: ', dhat) 
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
-        nb_updated_list_iam.append(nb_updated_iam)
-        print('\nnumber of updates of the best graph by random generation: ', 
-              nb_updated_random)
-        nb_updated_list_random.append(nb_updated_random)
-        print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
-        nb_updated_k_list_iam.append(nb_updated_k_iam)
-        print('\nnumber of updates of k nearest graphs by random generation: ', 
-              nb_updated_k_random)
-        nb_updated_k_list_random.append(nb_updated_k_random) 
-        
-        # show the best graph and save it to file.
-        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                with_labels=True)
-        plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
-                    '.png', format="PNG")
-#        plt.show()
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-          nb_updated_list_iam)
-    print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', 
-          nb_updated_list_random)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-          nb_updated_k_list_iam)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', 
-          nb_updated_k_list_random)
-    print('\ntimes:', time_list)
-    
-    
-
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-
-def test_preimage_mix_2combination_all_pairs():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    l_max = 500 # update limit for random generation
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    InitRandomWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
-    nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
-    # test on each pair of graphs.
-#    for idx1 in range(len(Gn) - 1, -1, -1):
-#        for idx2 in range(idx1, -1, -1):
-    for idx1 in range(187, 188):
-        for idx2 in range(167, 168):
-            g1 = Gn[idx1].copy()
-            g2 = Gn[idx2].copy()
-        #    Gn[10] = []
-        #    Gn[10] = []
-            
-            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-            plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
-            plt.show()
-            plt.clf()
-            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-            plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
-            plt.show()
-            plt.clf()
-
-            ###################################################################            
-#            Gn_mix = [g.copy() for g in Gn]
-#            Gn_mix.append(g1.copy())
-#            Gn_mix.append(g2.copy())
-#            
-#            # compute
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-#            
-#            # write Gram matrix to file and read it.
-#            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
-            
-            ###################################################################
-            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
-            km = gmfile['gm']
-            time_km = gmfile['gmtime']
-            # modify mixed gram matrix.
-            for i in range(len(Gn)):
-                km[i, len(Gn)] = km[i, idx1]
-                km[i, len(Gn) + 1] = km[i, idx2]
-                km[len(Gn), i] = km[i, idx1]
-                km[len(Gn) + 1, i] = km[i, idx2]
-            km[len(Gn), len(Gn)] = km[idx1, idx1]
-            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
-            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
-            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
-            
-            ###################################################################
-#            # use only the two graphs in median set as candidates.
-#            Gn = [g1.copy(), g2.copy()]
-#            Gn_mix = Gn + [g1.copy(), g2.copy()]
-#            # compute         
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-    
-            
-            time_list = []
-            dis_ks_min_list = []
-            sod_gs_list = []
-            sod_gs_min_list = []
-            nb_updated_list_iam = []
-            nb_updated_list_random = []
-            nb_updated_k_list_iam = []
-            nb_updated_k_list_random = []
-            g_best = []
-            # for each alpha
-            for alpha in alpha_range:
-                print('\n-------------------------------------------------------\n')
-                print('alpha =', alpha)
-                time0 = time.time()
-                dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
-                    nb_updated_k_iam, nb_updated_k_random = \
-                    preimage_iam_random_mix(Gn, [g1, g2],
-                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-                    l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
-                    InitRandomWithAllDk=InitRandomWithAllDk,
-                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                                'removeNodes': removeNodes, 'connected': connected_iam},
-                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                                'saveGXL': saveGXL})
-                time_total = time.time() - time0 + time_km
-                print('time: ', time_total)
-                time_list.append(time_total)
-                dis_ks_min_list.append(dhat)
-                g_best.append(ghat_list)
-                nb_updated_list_iam.append(nb_updated_iam)       
-                nb_updated_list_random.append(nb_updated_random)
-                nb_updated_k_list_iam.append(nb_updated_k_iam)       
-                nb_updated_k_list_random.append(nb_updated_k_random) 
-                
-            # show best graphs and save them to file.
-            for idx, item in enumerate(alpha_range):
-                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-                print('one of the possible corresponding pre-images is')
-                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                        with_labels=True)
-                plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) 
-                            + '_alpha' + str(item) + '.png', format="PNG")
-#                plt.show()
-                plt.clf()
-#                print(g_best[idx][0].nodes(data=True))
-#                print(g_best[idx][0].edges(data=True))
-                
-        #        for g in g_best[idx]:
-        #            draw_Letter_graph(g, savepath='results/gk_iam/')
-        ##            nx.draw_networkx(g)
-        ##            plt.show()
-        #            print(g.nodes(data=True))
-        #            print(g.edges(data=True))
-                    
-            # compute the corresponding sod in graph space.
-            for idx, item in enumerate(alpha_range):
-                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
-                                             ged_method=ged_method, saveGXL=saveGXL)
-                sod_gs_list.append(sod_tmp)
-                sod_gs_min_list.append(np.min(sod_tmp))
-                
-            print('\nsods in graph space: ', sod_gs_list)
-            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-            print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
-            print('\nnumber of updates of the best graph for each alpha by random generation: ', 
-                  nb_updated_list_random)
-            print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', 
-                  nb_updated_k_list_iam)
-            print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', 
-                  nb_updated_k_list_random)
-            print('\ntimes:', time_list)
-            nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
-            nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
-            
-            str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
-                % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
-            with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
-                content = file.read()
-                file.seek(0, 0)
-                file.write(str_fw + content)
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-#    test_preimage_mix_2combination_all_pairs()
-    
-###############################################################################
-# tests on different numbers of median-sets.
-#    test_preimage_mix_median_nb()
-    
-###############################################################################
-# tests on different values on grid of median-sets and k.
-    test_preimage_mix_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_random.py b/gklearn/preimage/test_preimage_random.py
deleted file mode 100644
index bb77d2f..0000000
--- a/gklearn/preimage/test_preimage_random.py
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.preimage_random import preimage_random
-from gklearn.preimage.ged import ged_median
-from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
-
-
-###############################################################################
-# tests on different values on grid of median-sets and k.
-
-def test_preimage_random_grid_k_median_nb():    
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-#    k = 5 # k nearest neighbors
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    # number of nearest neighbors.
-    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    g_best = []
-    for idx_nb, nb_median in enumerate(nb_median_range):
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        sod_gs_min_list.append([])
-        nb_updated_list.append([])
-        g_best.append([])   
-        
-        for k in k_range:
-            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
-            print('k =', k)
-            time0 = time.time()
-            dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
-                range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
-                
-            time_total = time.time() - time0 + time_km
-            print('time: ', time_total)
-            time_list[idx_nb].append(time_total)
-            print('\nsmallest distance in kernel space: ', dhat) 
-            dis_ks_min_list[idx_nb].append(dhat)
-            g_best[idx_nb].append(ghat)
-            print('\nnumber of updates of the best graph: ', nb_updated)
-            nb_updated_list[idx_nb].append(nb_updated)
-            
-            # show the best graph and save it to file.
-            print('the shortest distance is', dhat)
-            print('one of the possible corresponding pre-images is')
-            nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
-                    with_labels=True)
-            plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
-                        '_k' + str(k) + '.png', format="PNG")
-    #        plt.show()
-            plt.clf()
-    #        print(ghat_list[0].nodes(data=True))
-    #        print(ghat_list[0].edges(data=True))
-        
-            # compute the corresponding sod in graph space.
-            sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
-                                         ged_method=ged_method, saveGXL=saveGXL)
-            sod_gs_list[idx_nb].append(sod_tmp)
-            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
-            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
-          sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
-          nb_updated_list)
-    print('\ntimes:', time_list)
-    
-
-
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_preimage_random_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
-            range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
-            
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        print('\nsmallest distance in kernel space: ', dhat) 
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat)
-        print('\nnumber of updates of the best graph: ', nb_updated)
-        nb_updated_list.append(nb_updated)
-        
-        # show the best graph and save it to file.
-        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
-                with_labels=True)
-        plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
-                    '.png', format="PNG")
-#        plt.show()
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs: ', 
-          nb_updated_list)
-    print('\ntimes:', time_list)
-    
-    
-
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-    
-def test_random_preimage_2combination():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:12]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
-#    print(dis_max, dis_min, dis_mean)
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    l = 500
-    alpha_range = np.linspace(0, 1, 11)
-    k = 5 # k nearest neighbors
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]].copy()
-    g2 = Gn[idx_gi[1]].copy()
-    
-#    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
-#    plt.show()
-#    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
-#    plt.show()    
-    
-    ######################################################################
-#    Gn_mix = [g.copy() for g in Gn]
-#    Gn_mix.append(g1.copy())
-#    Gn_mix.append(g2.copy())
-#    
-##    g_tmp = iam([g1, g2])
-##    nx.draw_networkx(g_tmp)
-##    plt.show()
-#    
-#    # compute 
-#    time0 = time.time()
-#    km = compute_kernel(Gn_mix, gkernel, True)
-#    time_km = time.time() - time0
-    
-    ###################################################################
-    idx1 = idx_gi[0]
-    idx2 = idx_gi[1]
-    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
-    km = gmfile['gm']
-    time_km = gmfile['gmtime']
-    # modify mixed gram matrix.
-    for i in range(len(Gn)):
-        km[i, len(Gn)] = km[i, idx1]
-        km[i, len(Gn) + 1] = km[i, idx2]
-        km[len(Gn), i] = km[i, idx1]
-        km[len(Gn) + 1, i] = km[i, idx2]
-    km[len(Gn), len(Gn)] = km[idx1, idx1]
-    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
-    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
-    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
-            
-    ###################################################################
-
-    time_list = []
-    nb_updated_list = []
-    g_best = []
-    dis_ks_min_list = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('\n-------------------------------------------------------\n')
-        print('alpha =', alpha)
-        time0 = time.time()
-        dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], 
-                                          range(len(Gn), len(Gn) + 2), km,
-                                          k, r_max, l, gkernel)
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat)
-        nb_updated_list.append(nb_updated)
-        
-    # show best graphs and save them to file.
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-        print('one of the possible corresponding pre-images is')
-        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
-                with_labels=True)
-        plt.show()
-        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
-        plt.clf()
-        print(g_best[idx].nodes(data=True))
-        print(g_best[idx].edges(data=True))
-            
-#        # compute the corresponding sod in graph space. (alpha range not considered.)
-#        sod_tmp, _ = median_distance(g_best[0], Gn_let)
-#        sod_gs_list.append(sod_tmp)
-#        sod_gs_min_list.append(np.min(sod_tmp))
-#        sod_ks_min_list.append(sod_ks)
-#        nb_updated_list.append(nb_updated)
-                      
-#    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-    print('\nnumber of updates for each alpha: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-#    test_random_preimage_2combination()
-    
-###############################################################################
-# tests all algorithms on different numbers of median-sets.
-    test_preimage_random_median_nb()
-    
-###############################################################################
-# tests all algorithms on different values on grid of median-sets and k.
-#    test_preimage_random_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/xp_fit_method.py b/gklearn/preimage/xp_fit_method.py
deleted file mode 100644
index ead2786..0000000
--- a/gklearn/preimage/xp_fit_method.py
+++ /dev/null
@@ -1,935 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jan 14 15:39:29 2020
-
-@author: ljia
-"""
-import numpy as np
-import random
-import csv
-from shutil import copyfile
-import networkx as nx
-import matplotlib.pyplot as plt
-import os
-import time
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
-from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
-from gklearn.preimage.find_best_k import getRelations
-
-
-def get_dataset(ds_name):
-    if ds_name == 'Letter-high': # node non-symb
-        dataset = 'cpp_ext/data/collections/Letter.xml'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' 
-        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-        for G in Gn:
-            reform_attributes(G, na_names=['x', 'y'])
-            G.graph['node_labels'] = []
-            G.graph['edge_labels'] = []
-            G.graph['node_attrs'] = ['x', 'y']
-            G.graph['edge_attrs'] = []
-    elif ds_name == 'Letter-med': # node non-symb
-        dataset = 'cpp_ext/data/collections/Letter.xml'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/MED/' 
-        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-        for G in Gn:
-            reform_attributes(G, na_names=['x', 'y'])
-            G.graph['node_labels'] = []
-            G.graph['edge_labels'] = []
-            G.graph['node_attrs'] = ['x', 'y']
-            G.graph['edge_attrs'] = []
-    elif ds_name == 'Letter-low': # node non-symb
-        dataset = 'cpp_ext/data/collections/Letter.xml'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/LOW/' 
-        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-        for G in Gn:
-            reform_attributes(G, na_names=['x', 'y'])
-            G.graph['node_labels'] = []
-            G.graph['edge_labels'] = []
-            G.graph['node_attrs'] = ['x', 'y']
-            G.graph['edge_attrs'] = []
-    elif ds_name == 'Fingerprint':
-#        dataset = 'cpp_ext/data/collections/Fingerprint.xml'
-#        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
-#        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-#        for G in Gn:
-#            reform_attributes(G)
-        dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
-        Gn, y_all = loadDataset(dataset)
-    elif ds_name == 'SYNTHETIC':
-        pass
-    elif ds_name == 'SYNTHETICnew':
-        dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew'
-#        dataset = '../../datasets/Letter-high/Letter-high_A.txt'
-#        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
-        Gn, y_all = loadDataset(dataset)
-    elif ds_name == 'Synthie':
-        pass
-    elif ds_name == 'COIL-DEL':
-        dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/COIL-DEL/'
-        Gn, y_all = loadDataset(dataset)
-    elif ds_name == 'COIL-RAG':
-        pass
-    elif ds_name == 'COLORS-3':
-        pass
-    elif ds_name == 'FRANKENSTEIN':
-        pass
-    
-    return Gn, y_all, graph_dir
-
-
-def init_output_file(ds_name, gkernel, fit_method, dir_output):
-#    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
-    f_detail = open(dir_output + fn_output_detail, 'a')
-    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', 
-              'GED method', 'attr distance', 'fit method', 'k', 
-              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
-              'median set'])
-    f_detail.close()
-    
-#    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
-    f_summary = open(dir_output + fn_output_summary, 'a')
-    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', 
-              'GED method', 'attr distance', 'fit method', 'k', 
-              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
-              '# SOD SM -> GM', '# dis_k SM -> GM', 
-              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-              'repeats better dis_k gi -> GM'])
-    f_summary.close()
-    
-    return fn_output_detail, fn_output_summary
-
-
-def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1,
-                                   Gn_data=None, k_dis_data=None, Kmatrix=None,
-                                   is_separate=False):
-    
-    # 1. set parameters.
-    print('1. setting parameters...')
-    ds_name = parameters['ds_name']
-    gkernel = parameters['gkernel']
-    edit_cost_name = parameters['edit_cost_name']
-    ged_method = parameters['ged_method']
-    attr_distance = parameters['attr_distance']
-    fit_method = parameters['fit_method']
-    init_ecc = parameters['init_ecc']
-
-    node_label = None
-    edge_label = None
-    dir_output = 'results/xp_fit_method/'    
-      
-    
-    # 2. get dataset.
-    print('2. getting dataset...')
-    if Gn_data is None:
-        Gn, y_all, graph_dir = get_dataset(ds_name)
-    else:
-        Gn = Gn_data[0]
-        y_all = Gn_data[1]
-        graph_dir = Gn_data[2]
-        
-    
-    # 3. compute kernel distance matrix.
-    print('3. computing kernel distance matrix...')
-    if k_dis_data is None:
-        dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, 
-            None, Kmatrix=Kmatrix, gkernel=gkernel)
-    else:
-#        dis_mat = k_dis_data[0]
-#        dis_max = k_dis_data[1]
-#        dis_min = k_dis_data[2]
-#        dis_mean = k_dis_data[3]
-#        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean)
-        pass
-
-
-    if save_results:
-        # create result files.
-        print('creating output files...')
-        fn_output_detail, fn_output_summary = init_output_file(ds_name, gkernel, 
-                                                               fit_method, dir_output)
-
-            
-    # start repeats.    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [0]
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-#        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-        time_fitting_mean_list = []
-        time_generating_mean_list = []
-        time_total_mean_list = []
-        
-        # 3. start generating and computing over targets.
-        print('4. starting generating and computing over targets......')
-        for i, (y, values) in enumerate(y_idx.items()):
-#            y = 'I'
-#            values = y_idx[y]
-#            values = values[0:10]            
-            print('\ny =', y)
-#            if y.strip() == 'A':
-#                continue
-            
-            k = len(values)
-            print('\n--------- k =', k, '----------')
-            
-            if k < 2:
-                print('\nk = ', k, ', skip.\n')
-                continue
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            time_fitting_list = []
-            time_generating_list = []
-            time_total_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            # get Gram matrix for this part of data.
-            if Kmatrix is not None:
-                if is_separate:
-                    Kmatrix_sub = Kmatrix[i].copy()
-                else:
-                    Kmatrix_sub = Kmatrix[values,:]
-                    Kmatrix_sub = Kmatrix_sub[:,values]
-            else:
-                Kmatrix_sub = None
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-#                from notebooks.utils.plot_all_graphs import draw_Fingerprint_graph
-#                for Gn in Gn_median:
-#                    draw_Fingerprint_graph(Gn, save=None)
-                
-                # GENERATING & COMPUTING!!
-                res_sods, res_dis_ks, res_times = median_on_k_closest_graphs(Gn_median, 
-                        node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=graph_dir,
-                        edit_cost_constants=None, group_min=median_set_idx_idx, 
-                        dataset=ds_name, initial_solutions=initial_solutions,
-                        edit_cost_name=edit_cost_name, init_ecc=init_ecc,
-                        Kmatrix=Kmatrix_sub, parallel=False)
-                sod_sm = res_sods[0]
-                sod_gm = res_sods[1] 
-                dis_k_sm = res_dis_ks[0]
-                dis_k_gm = res_dis_ks[1]
-                dis_k_gi = res_dis_ks[2]
-                dis_k_gi_min = res_dis_ks[3]
-                idx_dis_k_gi_min = res_dis_ks[4]
-                time_fitting = res_times[0]
-                time_generating = res_times[1]                    
-                
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                if save_results:
-                    f_detail = open(dir_output + fn_output_detail, 'a')
-                    csv.writer(f_detail).writerow([ds_name, gkernel, 
-                              edit_cost_name, ged_method, attr_distance,
-                              fit_method, k, y, repeat,
-                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                              dis_k_gi2gm, time_fitting, time_generating,
-                              time_fitting + time_generating, median_set_idx])
-                    f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                time_fitting_list.append(time_fitting)
-                time_generating_list.append(time_generating)
-                time_total_list.append(time_fitting + time_generating)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-#                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='default')
-                
-                # plot median graphs.
-                if ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low':
-                    set_median = loadGXL(fn_pre_sm_new + '.gxl')
-                    gen_median = loadGXL(fn_pre_gm_new + '.gxl')                
-                    draw_Letter_graph(set_median, fn_pre_sm_new)
-                    draw_Letter_graph(gen_median, fn_pre_gm_new)
-                    draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            time_fitting_mean_list.append(np.mean(time_fitting_list))
-            time_generating_mean_list.append(np.mean(time_generating_list))
-            time_total_mean_list.append(np.mean(time_total_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            if save_results:
-                f_summary = open(dir_output + fn_output_summary, 'a')
-                csv.writer(f_summary).writerow([ds_name, gkernel, 
-                          edit_cost_name, ged_method, attr_distance,
-                          fit_method, k, y,
-                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, 
-                          time_fitting_mean_list[-1], time_generating_mean_list[-1],
-                          time_total_mean_list[-1], nb_sod_sm2gm, 
-                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-                f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        time_fitting_mean = np.mean(time_fitting_list)
-        time_generating_mean = np.mean(time_generating_list)
-        time_total_mean = np.mean(time_total_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        if save_results:
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, 
-                      edit_cost_name, ged_method, attr_distance,
-                      fit_method, k, 'all',
-                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean,
-                      time_fitting_mean, time_generating_mean, time_total_mean])
-            f_summary.close()
-        
-    print('\ncomplete.')
-    
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, file_prefix):
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph, pos)
-    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
-#    plt.show()
-    plt.clf()
-    
-    
-def compute_gm_for_each_class(Gn, y_all, gkernel, parallel='imap_unordered', is_separate=True):
-    
-    if is_separate:
-        print('the Gram matrix is computed for each class.')
-        y_idx = get_same_item_indices(y_all)
-        Kmatrix = []
-        run_time = []
-        k_dis_data = []
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('The ', str(i), ' class:')
-            Gn_i = [Gn[val] for val in values]
-            time0 = time.time()            
-            Kmatrix.append(compute_kernel(Gn_i, gkernel, None, None, True, parallel=parallel))
-            run_time.append(time.time() - time0)
-            k_dis_data.append(kernel_distance_matrix(Gn_i, None, None, 
-                Kmatrix=Kmatrix[i], gkernel=gkernel, verbose=True))
-        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
-        dis_max = np.max([item[1] for item in k_dis_data])
-        dis_min = np.min([item[2] for item in k_dis_data])
-        dis_mean = np.mean([item[3] for item in k_dis_data])
-        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min,
-              dis_mean)
-
-    else:
-        time0 = time.time()
-        Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel=parallel)
-        run_time = time.time() - time0
-        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
-        k_dis_data = kernel_distance_matrix(Gn, None, None, 
-            Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-        print('the Gram matrix is computed for the whole dataset.')
-        print('pair distances - dis_max, dis_min, dis_mean:', k_dis_data[1], 
-              k_dis_data[2], k_dis_data[3])
-    
-    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]
-    return Kmatrix, run_time, k_dis_data
-    
-
-if __name__ == "__main__":
-#    #### xp 1: Letter-high, spkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-high'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    # compute pair distances.
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=None, gkernel=gkernel, verbose=True)
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    # fitting and computing.
-#    fit_methods = ['random', 'expert', 'k-graphs']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
-        
-        
-#    #### xp 2: Letter-high, sspkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-high'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    # compute pair distances.
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=None, gkernel=gkernel, verbose=True)
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    # fitting and computing.
-#    fit_methods = ['random', 'expert', 'k-graphs']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
-        
-        
-#    #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    run_time = gmfile['run_time']
-#    # normalization
-#    Kmatrix_diag = Kmatrix.diagonal().copy()
-#    for i in range(len(Kmatrix)):
-#        for j in range(i, len(Kmatrix)):
-#            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
-#            Kmatrix[j][i] = Kmatrix[i][j]
-##    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
-##             Kmatrix=Kmatrix, run_time=run_time)
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'SYNTHETICnew'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
-#          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:10]
-##    y_all = y_all[0:10]
-#    for G in Gn:
-#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-#    # compute pair distances.
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'NON_SYMBOLIC',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=1,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-#    ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    # normalization
-#    Kmatrix_diag = Kmatrix.diagonal().copy()
-#    for i in range(len(Kmatrix)):
-#        for j in range(i, len(Kmatrix)):
-#            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
-#            Kmatrix[j][i] = Kmatrix[i][j]
-#    run_time = 21821.35
-#    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
-#             Kmatrix=Kmatrix, run_time=run_time)
-#    
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'SYNTHETICnew'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    # remove graphs without nodes and edges.
-##    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0
-##          and nx.number_of_edges(G) != 0)]
-##    idx = [G[0] for G in Gn]
-##    Gn = [G[1] for G in Gn]
-##    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:5]
-##    y_all = y_all[0:5]
-#    for G in Gn:
-#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-#    
-#    # compute/read Gram matrix and pair distances.
-##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
-##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-##         Kmatrix=Kmatrix)
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-#    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'NON_SYMBOLIC',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=1,
-#                                       Gn_data=[Gn, y_all, graph_dir],
-#                                       k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-    
-    
-#    #### xp 5: Fingerprint, sspkernel, using LETTER2, only node attrs.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Fingerprint'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
-##          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-#    y_idx = get_same_item_indices(y_all)
-#    # remove unused labels.
-#    for G in Gn:
-#        G.graph['edge_attrs'] = []
-#        for edge in G.edges:
-#            del G.edges[edge]['attributes']
-#            del G.edges[edge]['orient']
-#            del G.edges[edge]['angle']
-##    Gn = Gn[805:815]
-##    y_all = y_all[805:815]
-#    for G in Gn:
-#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-#            
-#    # compute/read Gram matrix and pair distances.
-##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-##         Kmatrix=Kmatrix)
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [1,1,1,1,1]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-#    #### xp 6: Letter-med, sspkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-med'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-#    #### xp 7: Letter-low, sspkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-low'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-    
-#    #### xp 8: Letter-med, spkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-med'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
-#          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-
-#    #### xp 9: Letter-low, spkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-low'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
-#          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-    #### xp 5: COIL-DEL, sspkernel, using LETTER2, only node attrs.
-    # load dataset.
-    print('getting dataset and computing kernel distance matrix first...')
-    ds_name = 'COIL-DEL'
-    gkernel = 'structuralspkernel'
-    Gn, y_all, graph_dir = get_dataset(ds_name)
-    # remove graphs without nodes and edges.
-    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
-#          and nx.number_of_edges(G) != 0)]
-    idx = [G[0] for G in Gn]
-    Gn = [G[1] for G in Gn]
-    y_all = [y_all[i] for i in idx]
-    # remove unused labels.
-    for G in Gn:
-        G.graph['edge_labels'] = []
-        for edge in G.edges:
-            del G.edges[edge]['bond_type']
-            del G.edges[edge]['valence']
-#    Gn = Gn[805:815]
-#    y_all = y_all[805:815]
-    for G in Gn:
-        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-            
-    # compute/read Gram matrix and pair distances.
-    is_separate = True
-    Kmatrix, run_time, k_dis_data = compute_gm_for_each_class(Gn, 
-                                                              y_all, 
-                                                              gkernel, 
-                                                              parallel='imap_unordered',
-                                                              is_separate=is_separate)
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    run_time = gmfile['run_time']
-#    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-#    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-#    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-#    Kmatrix = np.zeros((len(Gn), len(Gn)))
-#    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-    
-    # fitting and computing.
-    fit_methods = ['k-graphs', 'random', 'random', 'random']
-    for fit_method in fit_methods:
-        print('\n-------------------------------------')
-        print('fit method:', fit_method)
-        parameters = {'ds_name': ds_name,
-                      'gkernel': gkernel,
-                      'edit_cost_name': 'LETTER2',
-                      'ged_method': 'mIPFP',
-                      'attr_distance': 'euclidean',
-                      'fit_method': fit_method,
-                      'init_ecc': [3,3,1,3,3]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
-        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-                                       initial_solutions=40,
-                                       Gn_data=[Gn, y_all, graph_dir],
-                                       k_dis_data=k_dis_data,
-                                       Kmatrix=Kmatrix, 
-                                       is_separate=is_separate)
\ No newline at end of file
diff --git a/gklearn/preimage/xp_letter_h.py b/gklearn/preimage/xp_letter_h.py
deleted file mode 100644
index 1e16fcf..0000000
--- a/gklearn/preimage/xp_letter_h.py
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jan 14 15:39:29 2020
-
-@author: ljia
-"""
-import numpy as np
-import random
-import csv
-from shutil import copyfile
-import networkx as nx
-import matplotlib.pyplot as plt
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
-from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix
-from gklearn.preimage.find_best_k import getRelations
-
-
-def xp_letter_h_LETTER2_cost():
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-    
-    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel')
-    for G in Gn:
-        reform_attributes(G)
-#    ds = {'name': 'Letter-high', 
-#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'structuralspkernel'
-    node_label = None
-    edge_label = None
-    ds_name = 'letter-h'
-    dir_output = 'results/xp_letter_h/'
-    save_results = True
-    cost = 'LETTER2'
-    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [150]
-    fit_method = 'k-graphs'
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    
-    if save_results:
-        # create result files.
-        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_detail = open(dir_output + fn_output_detail, 'a')
-        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', 'median set'])
-        f_detail.close()
-        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-                  'repeats better dis_k gi -> GM'])
-        f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-#        nb_sod_sm2gm = [0, 0, 0]
-#        nb_dis_k_sm2gm = [0, 0, 0]
-#        nb_dis_k_gi2sm = [0, 0, 0]
-#        nb_dis_k_gi2gm = [0, 0, 0]
-#        repeats_better_sod_sm2gm = []
-#        repeats_better_dis_k_sm2gm = []
-#        repeats_better_dis_k_gi2sm = []
-#        repeats_better_dis_k_gi2gm = []
-        
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('\ny =', y)
-#            y = 'F'
-#            values = y_idx[y]
-#            values = values[0:10]
-            
-            k = len(values)
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-        
-                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
-                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
-                        edit_costs=None, group_min=median_set_idx_idx, 
-                        dataset='Letter', cost=cost, parallel=False)
-                    
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                if save_results:
-                    f_detail = open(dir_output + fn_output_detail, 'a')
-                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
-                              y, repeat,
-                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                              dis_k_gi2gm, median_set_idx])
-                    f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
-                
-                # plot median graphs.
-                set_median = loadGXL(fn_pre_sm_new + '.gxl')
-                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
-                draw_Letter_graph(set_median, fn_pre_sm_new)
-                draw_Letter_graph(gen_median, fn_pre_gm_new)
-                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            if save_results:
-                f_summary = open(dir_output + fn_output_summary, 'a')
-                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
-                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-                f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        if save_results:
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
-                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
-            f_summary.close()
-        
-    print('\ncomplete.')
-
-
-def xp_letter_h():
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-    for G in Gn:
-        reform_attributes(G)
-#    ds = {'name': 'Letter-high', 
-#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'structuralspkernel'
-    node_label = None
-    edge_label = None
-    ds_name = 'letter-h'
-    dir_output = 'results/xp_letter_h/'
-    save_results = False
-    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [150]
-    fit_method = 'k-graphs'
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    
-    if save_results:
-        # create result files.
-        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_detail = open(dir_output + fn_output_detail, 'a')
-        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', 'median set'])
-        f_detail.close()
-        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-                  'repeats better dis_k gi -> GM'])
-        f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-#        nb_sod_sm2gm = [0, 0, 0]
-#        nb_dis_k_sm2gm = [0, 0, 0]
-#        nb_dis_k_gi2sm = [0, 0, 0]
-#        nb_dis_k_gi2gm = [0, 0, 0]
-#        repeats_better_sod_sm2gm = []
-#        repeats_better_dis_k_sm2gm = []
-#        repeats_better_dis_k_gi2sm = []
-#        repeats_better_dis_k_gi2gm = []
-        
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('\ny =', y)
-#            y = 'N'
-#            values = y_idx[y]
-#            values = values[0:10]
-            
-            k = len(values)
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-        
-                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
-                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
-                        edit_costs=None, group_min=median_set_idx_idx, 
-                        dataset='Letter', parallel=False)
-                    
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                if save_results:
-                    f_detail = open(dir_output + fn_output_detail, 'a')
-                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
-                              y, repeat,
-                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                              dis_k_gi2gm, median_set_idx])
-                    f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
-                
-                # plot median graphs.
-                set_median = loadGXL(fn_pre_sm_new + '.gxl')
-                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
-                draw_Letter_graph(set_median, fn_pre_sm_new)
-                draw_Letter_graph(gen_median, fn_pre_gm_new)
-                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            if save_results:
-                f_summary = open(dir_output + fn_output_summary, 'a')
-                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
-                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-                f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        if save_results:
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
-                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
-            f_summary.close()
-        
-    print('\ncomplete.')
-    
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, file_prefix):
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph, pos)
-    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
-#    plt.show()
-    plt.clf()
-        
-
-if __name__ == "__main__":
-#    xp_letter_h()
-    xp_letter_h_LETTER2_cost()
\ No newline at end of file
diff --git a/gklearn/preimage/xp_monoterpenoides.py b/gklearn/preimage/xp_monoterpenoides.py
deleted file mode 100644
index 2270471..0000000
--- a/gklearn/preimage/xp_monoterpenoides.py
+++ /dev/null
@@ -1,249 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jan 16 11:03:11 2020
-
-@author: ljia
-"""
-
-import numpy as np
-import random
-import csv
-from shutil import copyfile
-import networkx as nx
-import matplotlib.pyplot as plt
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
-from gklearn.preimage.utils import get_same_item_indices
-from gklearn.preimage.find_best_k import getRelations
-
-def xp_monoterpenoides():
-    import os
-
-    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    ds = {'name': 'Letter-high', 
-#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'treeletkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    ds_name = 'monoterpenoides'
-    dir_output = 'results/xp_monoterpenoides/'
-    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [0]
-    fit_method = 'k-graphs'
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    
-    # create result files.
-    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    f_detail = open(dir_output + fn_output_detail, 'a')
-    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', 'median set'])
-    f_detail.close()
-    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    f_summary = open(dir_output + fn_output_summary, 'a')
-    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-              'repeats better dis_k gi -> GM'])
-    f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-#        nb_sod_sm2gm = [0, 0, 0]
-#        nb_dis_k_sm2gm = [0, 0, 0]
-#        nb_dis_k_gi2sm = [0, 0, 0]
-#        nb_dis_k_gi2gm = [0, 0, 0]
-#        repeats_better_sod_sm2gm = []
-#        repeats_better_dis_k_sm2gm = []
-#        repeats_better_dis_k_gi2sm = []
-#        repeats_better_dis_k_gi2gm = []
-        
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('\ny =', y)
-#            y = 'I'
-#            values = y_idx[y]
-            
-            k = len(values)
-#            k = kkk
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-        
-                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
-                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
-                        edit_costs=None, group_min=median_set_idx_idx, 
-                        dataset=ds_name, parallel=False)
-                    
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                f_detail = open(dir_output + fn_output_detail, 'a')
-                csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
-                          y, repeat,
-                          sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                          dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                          dis_k_gi2gm, median_set_idx])
-                f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-#                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib')
-                
-#                # plot median graphs.
-#                set_median = loadGXL(fn_pre_sm_new + '.gxl')
-#                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
-#                draw_Letter_graph(set_median, fn_pre_sm_new)
-#                draw_Letter_graph(gen_median, fn_pre_gm_new)
-#                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
-                      sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                      dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                      dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                      nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                      repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                      repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-            f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
-                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                  dis_k_gi2sm_mean, dis_k_gi2gm_mean])
-        f_summary.close()
-            
-        
-    print('\ncomplete.')
-    
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, file_prefix):
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph, pos)
-    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
-#    plt.show()
-    plt.clf()
-    
-
-if __name__ == "__main__":
-    xp_monoterpenoides()
\ No newline at end of file