rearrange gklearn/preimage directory.

5 years ago · 75e42f1838
--- a/gklearn/preimage/find_best_k.py
+++ b/gklearn/preimage/find_best_k.py
@@ -1,170 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Jan  9 11:54:32 2020
@author: ljia
 """
 import numpy as np
 import random
 import csv
 from gklearn.utils.graphfiles import loadDataset
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs
 def find_best_k():
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    gkernel = 'treeletkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    ds_name = 'mono'
    dir_output = 'results/test_find_best_k/'
    repeats = 50
    k_list = range(2, 11)
    fit_method = 'k-graphs'
    # fitted on the whole dataset - treelet - mono
    edit_costs = [0.1268873773592978, 0.004084633224249829, 0.0897581955378986, 0.15328856114451297, 0.3109956881625734, 0.0]
    # create result files.
    fn_output_detail = 'results_detail.' + fit_method + '.csv'
    f_detail = open(dir_output + fn_output_detail, 'a')
    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
              'repeat', 'median set', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
              'dis_k gi -> GM'])
    f_detail.close()
    fn_output_summary = 'results_summary.csv'
    f_summary = open(dir_output + fn_output_summary, 'a')
    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
              'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
              'repeats better dis_k gi -> GM'])
    f_summary.close()
    random.seed(1)
    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
    for k in k_list:
        print('\n--------- k =', k, '----------')
        sod_sm_list = []
        sod_gm_list = []
        dis_k_sm_list = []
        dis_k_gm_list = []
        dis_k_gi_min_list = []
        nb_sod_sm2gm = [0, 0, 0]
        nb_dis_k_sm2gm = [0, 0, 0]
        nb_dis_k_gi2sm = [0, 0, 0]
        nb_dis_k_gi2gm = [0, 0, 0]
        repeats_better_sod_sm2gm = []
        repeats_better_dis_k_sm2gm = []
        repeats_better_dis_k_gi2sm = []
        repeats_better_dis_k_gi2gm = []
        for repeat in range(repeats):
            print('\nrepeat =', repeat)
            random.seed(rdn_seed_list[repeat])
            median_set_idx = random.sample(range(0, len(Gn)), k)
            print('median set: ', median_set_idx)
            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
                                             fit_method='k-graphs', 
                                             edit_costs=edit_costs,
                                             group_min=median_set_idx,
                                             parallel=False)
            # write result detail.
            sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
            dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
            dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
            dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
            f_detail = open(dir_output + fn_output_detail, 'a')
            csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, repeat,
                      median_set_idx, sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
                      dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
                      dis_k_gi2gm])
            f_detail.close()
            # compute result summary.
            sod_sm_list.append(sod_sm)
            sod_gm_list.append(sod_gm)
            dis_k_sm_list.append(dis_k_sm)
            dis_k_gm_list.append(dis_k_gm)
            dis_k_gi_min_list.append(dis_k_gi_min)
            # # SOD SM -> GM
            if sod_sm > sod_gm:
                nb_sod_sm2gm[0] += 1
                repeats_better_sod_sm2gm.append(repeat)
            elif sod_sm == sod_gm:
                nb_sod_sm2gm[1] += 1
            elif sod_sm < sod_gm:
                nb_sod_sm2gm[2] += 1
            # # dis_k SM -> GM
            if dis_k_sm > dis_k_gm:
                nb_dis_k_sm2gm[0] += 1
                repeats_better_dis_k_sm2gm.append(repeat)
            elif dis_k_sm == dis_k_gm:
                nb_dis_k_sm2gm[1] += 1
            elif dis_k_sm < dis_k_gm:
                nb_dis_k_sm2gm[2] += 1
            # # dis_k gi -> SM
            if dis_k_gi_min > dis_k_sm:
                nb_dis_k_gi2sm[0] += 1
                repeats_better_dis_k_gi2sm.append(repeat)
            elif dis_k_gi_min == dis_k_sm:
                nb_dis_k_gi2sm[1] += 1
            elif dis_k_gi_min < dis_k_sm:
                nb_dis_k_gi2sm[2] += 1
            # # dis_k gi -> GM
            if dis_k_gi_min > dis_k_gm:
                nb_dis_k_gi2gm[0] += 1
                repeats_better_dis_k_gi2gm.append(repeat)
            elif dis_k_gi_min == dis_k_gm:
                nb_dis_k_gi2gm[1] += 1
            elif dis_k_gi_min < dis_k_gm:
                nb_dis_k_gi2gm[2] += 1
        # write result summary. 
        sod_sm_mean = np.mean(sod_sm_list)
        sod_gm_mean = np.mean(sod_gm_list)
        dis_k_sm_mean = np.mean(dis_k_sm_list)
        dis_k_gm_mean = np.mean(dis_k_gm_list)
        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
        f_summary = open(dir_output + fn_output_summary, 'a')
        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 
                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
                  dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
                  nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
                  repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
                  repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
        f_summary.close()
    print('\ncomplete.')
    return
 def getRelations(sign):
    if sign == -1:
        return 'better'
    elif sign == 0:
        return 'same'
    elif sign == 1:
        return 'worse'
 if __name__ == '__main__':
    find_best_k()
--- a/gklearn/preimage/fitDistance.py
+++ b/gklearn/preimage/fitDistance.py
@@ -1,430 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct 16 14:20:06 2019
@author: ljia
 """
 import numpy as np
 from tqdm import tqdm
 from itertools import combinations_with_replacement, combinations
 import multiprocessing
 from multiprocessing import Pool
 from functools import partial
 import time
 import random
 import sys
 from scipy import optimize
 from scipy.optimize import minimize
 import cvxpy as cp
 from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
 from gklearn.preimage.utils import kernel_distance_matrix
 def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
                               params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', 
                                           'method': 'IPFP', 'stabilizer': None},
                               init_costs=[3, 3, 1, 3, 3, 1],
                               dataset='monoterpenoides', Kmatrix=None,
                               parallel=True):
 #    dataset = dataset.lower()
    # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
 #    random.seed(1)
 #    cost_rdm = random.sample(range(1, 10), 6)
 #    init_costs = cost_rdm + [0]
 #    init_costs = cost_rdm
 #    init_costs = [3, 3, 1, 3, 3, 1]
 #    init_costs = [i * 0.01 for i in cost_rdm] + [0]
 #    init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
 #    init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
 #    init_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
 #    idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
    # compute distances in feature space.
    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
                                                Kmatrix=Kmatrix, gkernel=gkernel)
    dis_k_vec = []
    for i in range(len(dis_k_mat)):
 #        for j in range(i, len(dis_k_mat)):
        for j in range(i + 1, len(dis_k_mat)):
            dis_k_vec.append(dis_k_mat[i, j])
    dis_k_vec = np.array(dis_k_vec)
    # init ged.
    print('\ninitial:')
    time0 = time.time()
    params_ged['dataset'] = dataset
    params_ged['edit_cost_constant'] = init_costs
    ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
                                                            parallel=parallel)
    residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]    
    time_list = [time.time() - time0]
    edit_cost_list = [init_costs]  
    nb_cost_mat = np.array(n_edit_operations)
    nb_cost_mat_list = [nb_cost_mat]
    print('edit_costs:', init_costs)
    print('residual_list:', residual_list)
    for itr in range(itr_max):
        print('\niteration', itr)
        time0 = time.time()
        # "fit" geds to distances in feature space by tuning edit costs using the
        # Least Squares Method.
        np.savez('results/xp_fit_method/fit_data_debug' + str(itr) + '.gm', 
                 nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, 
                 n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
                 ged_mat=ged_mat)
        edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec, 
                                                dataset=dataset, cost=params_ged['cost'])
        for i in range(len(edit_costs_new)):
            if -1e-9 <= edit_costs_new[i] <= 1e-9:
                edit_costs_new[i] = 0
            if edit_costs_new[i] < 0:
                raise ValueError('The edit cost is negative.')
 #        for i in range(len(edit_costs_new)):
 #            if edit_costs_new[i] < 0:
 #                edit_costs_new[i] = 0
        # compute new GEDs and numbers of edit operations.
        params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75])
        ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
                                                           parallel=parallel)
        residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
        time_list.append(time.time() - time0)
        edit_cost_list.append(edit_costs_new)
        nb_cost_mat = np.array(n_edit_operations)
        nb_cost_mat_list.append(nb_cost_mat)                        
        print('edit_costs:', edit_costs_new)
        print('residual_list:', residual_list)
    return edit_costs_new, residual_list, edit_cost_list, dis_k_mat, ged_mat, \
        time_list, nb_cost_mat_list
 def compute_geds(Gn, params_ged, parallel=False):
    edit_cost_name = params_ged['cost']
    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2':
        get_nb_eo = get_nb_edit_operations_letter
    elif edit_cost_name == 'NON_SYMBOLIC':
        get_nb_eo = get_nb_edit_operations_nonsymbolic
    else: 
        get_nb_eo = get_nb_edit_operations
    ged_mat = np.zeros((len(Gn), len(Gn)))
    if parallel:
 #        print('parallel')
 #        len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
        len_itr = int(len(Gn) * (len(Gn) - 1) / 2)
        ged_vec = [0 for i in range(len_itr)]
        n_edit_operations = [0 for i in range(len_itr)]
 #        itr = combinations_with_replacement(range(0, len(Gn)), 2)
        itr = combinations(range(0, len(Gn)), 2)
        n_jobs = multiprocessing.cpu_count()
        if len_itr < 100 * n_jobs:
            chunksize = int(len_itr / n_jobs) + 1
        else:
            chunksize = 100
        def init_worker(gn_toshare):
            global G_gn
            G_gn = gn_toshare
        do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo)
        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
        iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
                        desc='computing GEDs', file=sys.stdout)
 #        iterator = pool.imap_unordered(do_partial, itr, chunksize)
        for i, j, dis, n_eo_tmp in iterator:
            idx_itr = int(len(Gn) * i + j - (i + 1) * (i + 2) / 2)
            ged_vec[idx_itr] = dis
            ged_mat[i][j] = dis
            ged_mat[j][i] = dis
            n_edit_operations[idx_itr] = n_eo_tmp
 #            print('\n-------------------------------------------')
 #            print(i, j, idx_itr, dis)
        pool.close()
        pool.join()
    else:
        ged_vec = []
        n_edit_operations = []
        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
 #        for i in range(len(Gn)):
            for j in range(i + 1, len(Gn)):
                dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], **params_ged)
                ged_vec.append(dis)
                ged_mat[i][j] = dis
                ged_mat[j][i] = dis
                n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward)
                n_edit_operations.append(n_eo_tmp)
    return ged_vec, ged_mat, n_edit_operations
 def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr):
    i = itr[0]
    j = itr[1]
    dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo)
    return i, j, dis, n_eo_tmp
 def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo):
    dis, pi_forward, pi_backward = GED(g1, g2, **params_ged)
    n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0]
    return dis, n_eo_tmp
 def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides', 
                 cost='CONSTANT', rw_constraints='inequality'):
 #    if dataset == 'Letter-high':
    if cost == 'LETTER':            
        pass
 #        # method 1: set alpha automatically, just tune c_vir and c_eir by 
 #        # LMS using cvxpy.
 #        alpha = 0.5
 #        coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
 ##        if np.count_nonzero(nb_cost_mat[:,4]) == 0:
 ##            alpha = 0.75
 ##        else:
 ##            alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
 ##        alpha = alpha * 0.99
 #        param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
 #        param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
 #        nb_cost_mat_new = np.column_stack((param_vir, param_eir))
 #        dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
 #        
 #        x = cp.Variable(nb_cost_mat_new.shape[1])
 #        cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
 #        constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
 #        prob = cp.Problem(cp.Minimize(cost), constraints)
 #        prob.solve()
 #        edit_costs_new = x.value
 #        edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
 #        residual = np.sqrt(prob.value)
 #        # method 2: tune c_vir, c_eir and alpha by nonlinear programming by 
 #        # scipy.optimize.minimize.
 #        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
 #        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
 #        w2 = nb_cost_mat[:,3]
 #        w3 = dis_k_vec
 #        func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
 #                             + w2 * x[2] - w3 * x[3]) ** 2)
 #        bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
 #        res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
 #        edit_costs_new = res.x[0:3]
 #        residual = res.fun
    # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
 #        # method 4: tune c_vir, c_eir and alpha by QP function
 #        # scipy.optimize.least_squares. An initial guess is required.
 #        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
 #        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
 #        w2 = nb_cost_mat[:,3]
 #        w3 = dis_k_vec
 #        func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
 #                             + w2 * x[2] - w3 * x[3]) ** 2
 #        res = optimize.root(func, [0.9, 1.7, 0.75, 100])
 #        edit_costs_new = res.x
 #        residual = None
    elif cost == 'LETTER2':
 #            # 1. if c_vi != c_vr, c_ei != c_er.
 #            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
 #            x = cp.Variable(nb_cost_mat_new.shape[1])
 #            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 ##            # 1.1 no constraints.
 ##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
 #            # 1.2 c_vs <= c_vi + c_vr.
 #            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
 #                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]            
 ##            # 2. if c_vi == c_vr, c_ei == c_er.
 ##            nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
 ##            nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
 ##            nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
 ##            x = cp.Variable(nb_cost_mat_new.shape[1])
 ##            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 ##            # 2.1 no constraints.
 ##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
 ###            # 2.2 c_vs <= c_vi + c_vr.
 ###            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
 ###                           np.array([2.0, -1.0, 0.0]).T@x >= 0.0]     
 #            
 #            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 #            prob.solve()
 #            edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
 #            edit_costs_new = np.array(edit_costs_new)
 #            residual = np.sqrt(prob.value)
        if rw_constraints == 'inequality':
            # c_vs <= c_vi + c_vr.
            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
            x = cp.Variable(nb_cost_mat_new.shape[1])
            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
            constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
            try:
                prob.solve(verbose=True)
            except MemoryError as error0:
                print('\nUsing solver "OSQP" caused a memory error.')
                print('the original error message is\n', error0)
                print('solver status: ', prob.status)
                print('trying solver "CVXOPT" instead...\n')
                try:
                    prob.solve(solver=cp.CVXOPT, verbose=True)
                except Exception as error1:
                    print('\nAn error occured when using solver "CVXOPT".')
                    print('the original error message is\n', error1)
                    print('solver status: ', prob.status)
                    print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
                    prob.solve(solver=cp.MOSEK, verbose=True)
                else:
                    print('solver status: ', prob.status)                    
            else:
                print('solver status: ', prob.status)
            print()
            edit_costs_new = x.value
            residual = np.sqrt(prob.value)
        elif rw_constraints == '2constraints':
            # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
            x = cp.Variable(nb_cost_mat_new.shape[1])
            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
                           np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
            prob.solve()
            edit_costs_new = x.value
            residual = np.sqrt(prob.value)
        elif rw_constraints == 'no-constraint':
            # no constraint.
            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
            x = cp.Variable(nb_cost_mat_new.shape[1])
            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
            prob.solve()
            edit_costs_new = x.value
            residual = np.sqrt(prob.value)
 #            elif method == 'inequality_modified':
 #                # c_vs <= c_vi + c_vr.
 #                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
 #                x = cp.Variable(nb_cost_mat_new.shape[1])
 #                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 #                constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
 #                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
 #                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 #                prob.solve()
 #                # use same costs for insertion and removal rather than the fitted costs.
 #                edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
 #                edit_costs_new = np.array(edit_costs_new)
 #                residual = np.sqrt(prob.value)
    elif cost == 'NON_SYMBOLIC':
        is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
        is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
        if dataset == 'SYNTHETICnew':
 #            nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
            nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
            x = cp.Variable(nb_cost_mat_new.shape[1])
            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 #            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
 #                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
 #            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
            prob.solve()
 #            print(x.value)
            edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value, 
                                             np.array([0.0])))
            residual = np.sqrt(prob.value)
        elif rw_constraints == 'inequality':
            # c_vs <= c_vi + c_vr.
            if is_n_attr and is_e_attr:
                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
                x = cp.Variable(nb_cost_mat_new.shape[1])
                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
                               np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
                               np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
                prob.solve()
                edit_costs_new = x.value
                residual = np.sqrt(prob.value)
            elif is_n_attr and not is_e_attr:
                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
                x = cp.Variable(nb_cost_mat_new.shape[1])
                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
                constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
                prob.solve()
                print(x.value)
                edit_costs_new = np.concatenate((x.value, np.array([0.0])))
                residual = np.sqrt(prob.value)
            elif not is_n_attr and is_e_attr:
                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
                x = cp.Variable(nb_cost_mat_new.shape[1])
                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
                               np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
                prob.solve()
                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
                residual = np.sqrt(prob.value)
            else:
                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
                x = cp.Variable(nb_cost_mat_new.shape[1])
                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
                prob.solve()
                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), 
                                                 x.value[2:], np.array([0.0])))
                residual = np.sqrt(prob.value)
    else:
 #    # method 1: simple least square method.
 #    edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
 #                                                     rcond=None)
 #    # method 2: least square method with x_i >= 0.
 #    edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
    # method 3: solve as a quadratic program with constraints.
 #    P = np.dot(nb_cost_mat.T, nb_cost_mat)
 #    q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
 #    G = -1 * np.identity(nb_cost_mat.shape[1])
 #    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
 #    A = np.array([1 for i in range(nb_cost_mat.shape[1])])
 #    b = 1
 #    x = cp.Variable(nb_cost_mat.shape[1])
 #    prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
 #                      [G@x <= h])
 #    prob.solve()
 #    edit_costs_new = x.value
 #    residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
 #    G = -1 * np.identity(nb_cost_mat.shape[1])
 #    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
        x = cp.Variable(nb_cost_mat.shape[1])
        cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
        constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
    #                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
                       np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
                       np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
        prob = cp.Problem(cp.Minimize(cost_fun), constraints)
        prob.solve()
        edit_costs_new = x.value
        residual = np.sqrt(prob.value)
    # method 4: 
    return edit_costs_new, residual
 if __name__ == '__main__':
    print('check test_fitDistance.py')
--- a/gklearn/preimage/ged.py
+++ b/gklearn/preimage/ged.py
@@ -1,467 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Oct 17 18:44:59 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 from tqdm import tqdm
 import sys
 import multiprocessing
 from multiprocessing import Pool
 from functools import partial
 #from gedlibpy_linlin import librariesImport, gedlibpy
 from gklearn.gedlib import librariesImport, gedlibpy
 def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', 
        edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50):
    """
    Compute GED for 2 graphs.
    """    
 #    dataset = dataset.lower()
    if lib == 'gedlibpy':
        gedlibpy.restart_env()
        gedlibpy.add_nx_graph(convertGraph(g1, cost), "")
        gedlibpy.add_nx_graph(convertGraph(g2, cost), "")
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
        gedlibpy.init()
        gedlibpy.set_method(method, algo_options)
        gedlibpy.init_method()
        g = listID[0]
        h = listID[1]
        if stabilizer is None:
            gedlibpy.run_method(g, h)
            pi_forward = gedlibpy.get_forward_map(g, h)
            pi_backward = gedlibpy.get_backward_map(g, h)
            upper = gedlibpy.get_upper_bound(g, h)
            lower = gedlibpy.get_lower_bound(g, h)        
        elif stabilizer == 'mean':
            # @todo: to be finished...
            upper_list = [np.inf] * repeat
            for itr in range(repeat):                
                gedlibpy.run_method(g, h)                
                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
                pi_forward = gedlibpy.get_forward_map(g, h)
                pi_backward = gedlibpy.get_backward_map(g, h)
                lower = gedlibpy.get_lower_bound(g, h)
            upper = np.mean(upper_list)
        elif stabilizer == 'median':
            if repeat % 2 == 0:
                repeat += 1
            upper_list = [np.inf] * repeat
            pi_forward_list = [0] * repeat
            pi_backward_list = [0] * repeat
            for itr in range(repeat):                
                gedlibpy.run_method(g, h)                
                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
                pi_forward_list[itr] = gedlibpy.get_forward_map(g, h)
                pi_backward_list[itr] = gedlibpy.get_backward_map(g, h)
                lower = gedlibpy.get_lower_bound(g, h)
            upper = np.median(upper_list)
            idx_median = upper_list.index(upper)
            pi_forward = pi_forward_list[idx_median]
            pi_backward = pi_backward_list[idx_median]
        elif stabilizer == 'min':
            upper = np.inf
            for itr in range(repeat):                
                gedlibpy.run_method(g, h)                
                upper_tmp = gedlibpy.get_upper_bound(g, h)                
                if upper_tmp < upper:
                    upper = upper_tmp
                    pi_forward = gedlibpy.get_forward_map(g, h)
                    pi_backward = gedlibpy.get_backward_map(g, h)
                    lower = gedlibpy.get_lower_bound(g, h)
                if upper == 0:
                    break
        elif stabilizer == 'max':
            upper = 0
            for itr in range(repeat):                
                gedlibpy.run_method(g, h)                
                upper_tmp = gedlibpy.get_upper_bound(g, h)                
                if upper_tmp > upper:
                    upper = upper_tmp
                    pi_forward = gedlibpy.get_forward_map(g, h)
                    pi_backward = gedlibpy.get_backward_map(g, h)
                    lower = gedlibpy.get_lower_bound(g, h)
        elif stabilizer == 'gaussian':
            pass
        dis = upper
    elif lib == 'gedlib-bash':
        import time
        import random
        import os
        from gklearn.utils.graphfiles import saveDataset
        tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
        if not os.path.exists(tmp_dir):
            os.makedirs(tmp_dir)
        fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
        xparams = {'method': 'gedlib', 'graph_dir': fn_collection}
        saveDataset([g1, g2], ['dummy', 'dummy'], gformat='gxl', group='xml', 
                    filename=fn_collection, xparams=xparams)
        command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
        command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
        command += 'export LD_LIBRARY_PATH\n'
        command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
        command += './ged_for_python_bash monoterpenoides ' + fn_collection \
                + ' \'' + algo_options + '\' '
        for ec in edit_cost_constant:
            command += str(ec) + ' '
 #        output = os.system(command)
        stream = os.popen(command)
        output = stream.readlines()
 #        print(output)
        dis = float(output[0].strip())
        runtime = float(output[1].strip())
        size_forward = int(output[2].strip())
        pi_forward = [int(item.strip()) for item in output[3:3+size_forward]]
        pi_backward = [int(item.strip()) for item in output[3+size_forward:]]
 #        print(dis)
 #        print(runtime)
 #        print(size_forward)
 #        print(pi_forward)
 #        print(pi_backward)
    # make the map label correct (label remove map as np.inf)
    nodes1 = [n for n in g1.nodes()]
    nodes2 = [n for n in g2.nodes()]
    nb1 = nx.number_of_nodes(g1)
    nb2 = nx.number_of_nodes(g2)
    pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
    pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
 #        print(pi_forward)
    return dis, pi_forward, pi_backward
 def convertGraph(G, cost):
    """Convert a graph to the proper NetworkX format that can be
    recognized by library gedlibpy.
    """
    G_new = nx.Graph()
    if cost == 'LETTER' or cost == 'LETTER2':   
        for nd, attrs in G.nodes(data=True):
            G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
                           y=str(attrs['attributes'][1]))
        for nd1, nd2, attrs in G.edges(data=True):
            G_new.add_edge(str(nd1), str(nd2))
    elif cost == 'NON_SYMBOLIC':
        for nd, attrs in G.nodes(data=True):
            G_new.add_node(str(nd))
            for a_name in G.graph['node_attrs']:
                G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
        for nd1, nd2, attrs in G.edges(data=True):
            G_new.add_edge(str(nd1), str(nd2))
            for a_name in G.graph['edge_attrs']:
                G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
    else:
        for nd, attrs in G.nodes(data=True):
            G_new.add_node(str(nd), chem=attrs['atom'])
        for nd1, nd2, attrs in G.edges(data=True):
            G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
 #                G_new.add_edge(str(nd1), str(nd2))
    return G_new
 def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
        edit_cost_constant=[], stabilizer='min', repeat=50):
    """
    Compute GEDs for a group of graphs.
    """
    if lib == 'gedlibpy':
        def convertGraph(G):
            """Convert a graph to the proper NetworkX format that can be
            recognized by library gedlibpy.
            """
            G_new = nx.Graph()
            for nd, attrs in G.nodes(data=True):
                G_new.add_node(str(nd), chem=attrs['atom'])
            for nd1, nd2, attrs in G.edges(data=True):
 #                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
                G_new.add_edge(str(nd1), str(nd2))
            return G_new
        gedlibpy.restart_env()
        gedlibpy.add_nx_graph(convertGraph(g1), "")
        gedlibpy.add_nx_graph(convertGraph(g2), "")
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
        gedlibpy.init()
        gedlibpy.set_method(method, "")
        gedlibpy.init_method()
        g = listID[0]
        h = listID[1]
        if stabilizer is None:
            gedlibpy.run_method(g, h)
            pi_forward = gedlibpy.get_forward_map(g, h)
            pi_backward = gedlibpy.get_backward_map(g, h)
            upper = gedlibpy.get_upper_bound(g, h)
            lower = gedlibpy.get_lower_bound(g, h)        
        elif stabilizer == 'min':
            upper = np.inf
            for itr in range(repeat):                
                gedlibpy.run_method(g, h)                
                upper_tmp = gedlibpy.get_upper_bound(g, h)                
                if upper_tmp < upper:
                    upper = upper_tmp
                    pi_forward = gedlibpy.get_forward_map(g, h)
                    pi_backward = gedlibpy.get_backward_map(g, h)
                    lower = gedlibpy.get_lower_bound(g, h)
                if upper == 0:
                    break
        dis = upper
        # make the map label correct (label remove map as np.inf)
        nodes1 = [n for n in g1.nodes()]
        nodes2 = [n for n in g2.nodes()]
        nb1 = nx.number_of_nodes(g1)
        nb2 = nx.number_of_nodes(g2)
        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
    return dis, pi_forward, pi_backward
 def ged_median(Gn, Gn_median, verbose=False, params_ged={'lib': 'gedlibpy', 
               'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], 
               'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1',
               'stabilizer': None}, parallel=False):
    if parallel:
        len_itr = int(len(Gn))
        pi_forward_list = [[] for i in range(len_itr)]
        dis_list = [0 for i in range(len_itr)]
        itr = range(0, len_itr)
        n_jobs = multiprocessing.cpu_count()
        if len_itr < 100 * n_jobs:
            chunksize = int(len_itr / n_jobs) + 1
        else:
            chunksize = 100
        def init_worker(gn_toshare, gn_median_toshare):
            global G_gn, G_gn_median
            G_gn = gn_toshare
            G_gn_median = gn_median_toshare
        do_partial = partial(_compute_ged_median, params_ged)
        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn, Gn_median))
        if verbose:
            iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
                            desc='computing GEDs', file=sys.stdout)
        else:
            iterator = pool.imap_unordered(do_partial, itr, chunksize)
        for i, dis_sum, pi_forward in iterator:
            pi_forward_list[i] = pi_forward
            dis_list[i] = dis_sum
 #            print('\n-------------------------------------------')
 #            print(i, j, idx_itr, dis)
        pool.close()
        pool.join()
    else:
        dis_list = []
        pi_forward_list = []
        for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
                           file=sys.stdout) if verbose else enumerate(Gn):
            dis_sum = 0
            pi_forward_list.append([])
            for G_p in Gn_median:
                dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
                    **params_ged)
                pi_forward_list[idx].append(pi_tmp_forward)
                dis_sum += dis_tmp
            dis_list.append(dis_sum)
    return dis_list, pi_forward_list
 def _compute_ged_median(params_ged, itr):
 #    print(itr)
    dis_sum = 0
    pi_forward = []
    for G_p in G_gn_median:
        dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_gn[itr], G_p, 
                    **params_ged)
        pi_forward.append(pi_tmp_forward)
        dis_sum += dis_tmp
    return itr, dis_sum, pi_forward
 def get_nb_edit_operations(g1, g2, forward_map, backward_map):
    """Compute the number of each edit operations.
    """
    n_vi = 0
    n_vr = 0
    n_vs = 0
    n_ei = 0
    n_er = 0
    n_es = 0
    nodes1 = [n for n in g1.nodes()]
    for i, map_i in enumerate(forward_map):
        if map_i == np.inf:
            n_vr += 1
        elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
            n_vs += 1
    for map_i in backward_map:
        if map_i == np.inf:
            n_vi += 1
 #    idx_nodes1 = range(0, len(node1))
    edges1 = [e for e in g1.edges()]
    nb_edges2_cnted = 0
    for n1, n2 in edges1:
        idx1 = nodes1.index(n1)
        idx2 = nodes1.index(n2)
        # one of the nodes is removed, thus the edge is removed.
        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
            n_er += 1
        # corresponding edge is in g2.
        elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
            nb_edges2_cnted += 1
            # edge labels are different.
            if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
                != g1.edges[(n1, n2)]['bond_type']:
                    n_es += 1
        elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
            nb_edges2_cnted += 1
            # edge labels are different.
            if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
                != g1.edges[(n1, n2)]['bond_type']:
                    n_es += 1                
        # corresponding nodes are in g2, however the edge is removed.
        else:
            n_er += 1
    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
    return n_vi, n_vr, n_vs, n_ei, n_er, n_es
 def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
    """Compute the number of each edit operations.
    """
    n_vi = 0
    n_vr = 0
    n_vs = 0
    sod_vs = 0
    n_ei = 0
    n_er = 0
    nodes1 = [n for n in g1.nodes()]
    for i, map_i in enumerate(forward_map):
        if map_i == np.inf:
            n_vr += 1
        else:
            n_vs += 1
            diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
            diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
            sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
    for map_i in backward_map:
        if map_i == np.inf:
            n_vi += 1
 #    idx_nodes1 = range(0, len(node1))
    edges1 = [e for e in g1.edges()]
    nb_edges2_cnted = 0
    for n1, n2 in edges1:
        idx1 = nodes1.index(n1)
        idx2 = nodes1.index(n2)
        # one of the nodes is removed, thus the edge is removed.
        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
            n_er += 1
        # corresponding edge is in g2. Edge label is not considered.
        elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
            (forward_map[idx2], forward_map[idx1]) in g2.edges():
                nb_edges2_cnted += 1
        # corresponding nodes are in g2, however the edge is removed.
        else:
            n_er += 1
    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
    return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
 def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
    """Compute the number of each edit operations.
    """
    n_vi = 0
    n_vr = 0
    n_vs = 0
    sod_vs = 0
    n_ei = 0
    n_er = 0
    n_es = 0
    sod_es = 0
    nodes1 = [n for n in g1.nodes()]
    for i, map_i in enumerate(forward_map):
        if map_i == np.inf:
            n_vr += 1
        else:
            n_vs += 1
            sum_squares = 0
            for a_name in g1.graph['node_attrs']:
                diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
                sum_squares += np.square(diff)
            sod_vs += np.sqrt(sum_squares)
    for map_i in backward_map:
        if map_i == np.inf:
            n_vi += 1
 #    idx_nodes1 = range(0, len(node1))
    edges1 = [e for e in g1.edges()]
    for n1, n2 in edges1:
        idx1 = nodes1.index(n1)
        idx2 = nodes1.index(n2)
        n1_g2 = forward_map[idx1]
        n2_g2 = forward_map[idx2]
        # one of the nodes is removed, thus the edge is removed.
        if n1_g2 == np.inf or n2_g2 == np.inf:
            n_er += 1
        # corresponding edge is in g2.
        elif (n1_g2, n2_g2) in g2.edges():
            n_es += 1
            sum_squares = 0
            for a_name in g1.graph['edge_attrs']:
                diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
                sum_squares += np.square(diff)
            sod_es += np.sqrt(sum_squares)
        elif (n2_g2, n1_g2) in g2.edges():
            n_es += 1
            sum_squares = 0
            for a_name in g1.graph['edge_attrs']:
                diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
                sum_squares += np.square(diff)
            sod_es += np.sqrt(sum_squares)
        # corresponding nodes are in g2, however the edge is removed.
        else:
            n_er += 1
    n_ei = nx.number_of_edges(g2) - n_es
    return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
 if __name__ == '__main__':
    print('check test_ged.py')
--- a/gklearn/preimage/iam.py
+++ b/gklearn/preimage/iam.py
@@ -1,775 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Fri Apr 26 11:49:12 2019
 Iterative alternate minimizations using GED.
@author: ljia
 """
 import numpy as np
 import random
 import networkx as nx
 from tqdm import tqdm
 from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
 from gklearn.preimage.ged import GED, ged_median
 def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, 
        epsilon=0.001, node_label='atom', edge_label='bond_type', 
        connected=False, removeNodes=True, allBestInit=False, allBestNodes=False,
        allBestEdges=False, allBestOutput=False,
        params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
                    'edit_cost_constant': [], 'stabilizer': None, 
                    'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'}):
    """See my name, then you know what I do.
    """
 #    Gn_median = Gn_median[0:10]
 #    Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
    node_ir = np.inf # corresponding to the node remove and insertion.
    label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
    ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, 
                                      attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], 
                                      edge_label=edge_label)
    node_label_set = get_node_labels(Gn_median, node_label)
    edge_label_set = get_edge_labels(Gn_median, edge_label)
    def generate_graph(G, pi_p_forward):
        G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
 #        nx.draw_networkx(G)
 #        import matplotlib.pyplot as plt
 #        plt.show()
 #        print(pi_p_forward)
        # update vertex labels.
        # pre-compute h_i0 for each label.
 #        for label in get_node_labels(Gn, node_label):
 #            print(label)
 #        for nd in G.nodes(data=True):
 #            pass
        if not ds_attrs['node_attr_dim']: # labels are symbolic
            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
                h_i0_list = []
                label_list = []
                for label in node_label_set:
                    h_i0 = 0
                    for idx, g in enumerate(Gn_median):
                        pi_i = pi_p_forward[idx][ndi]
                        if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
                            h_i0 += 1
                    h_i0_list.append(h_i0)
                    label_list.append(label)
                # case when the node is to be removed.
                if removeNodes:
                    h_i0_remove = 0 # @todo: maybe this can be added to the node_label_set above.
                    for idx, g in enumerate(Gn_median):
                        pi_i = pi_p_forward[idx][ndi]
                        if pi_i == node_ir:
                            h_i0_remove += 1
                    h_i0_list.append(h_i0_remove)
                    label_list.append(label_r)
                # get the best labels.
                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
                if allBestNodes: # choose all best graphs.                    
                    nlabel_best = [label_list[idx] for idx in idx_max]
                    # generate "best" graphs with regard to "best" node labels.
                    G_new_list_nd = []
                    for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
                        for nl in nlabel_best:
                            g_tmp = g.copy()
                            if nl == label_r:
                                g_tmp.remove_node(nd)
                            else:
                                g_tmp.nodes[nd][node_label] = nl
                            G_new_list_nd.append(g_tmp)
    #                            nx.draw_networkx(g_tmp)
    #                            import matplotlib.pyplot as plt
    #                            plt.show()
    #                            print(g_tmp.nodes(data=True))
    #                            print(g_tmp.edges(data=True))
                    G_new_list = [ggg.copy() for ggg in G_new_list_nd]
                else: 
                    # choose one of the best randomly.
                    idx_rdm = random.randint(0, len(idx_max) - 1)
                    best_label = label_list[idx_max[idx_rdm]]
                    h_i0_max = h_i0_list[idx_max[idx_rdm]]
                    g_new = G_new_list[0]
                    if best_label == label_r:
                        g_new.remove_node(nd) 
                    else:
                        g_new.nodes[nd][node_label] = best_label
                    G_new_list = [g_new]
        else: # labels are non-symbolic
            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
                Si_norm = 0
                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
                for idx, g in enumerate(Gn_median):
                    pi_i = pi_p_forward[idx][ndi]
                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
                        Si_norm += 1
                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
                phi_i_bar /= Si_norm
                G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
 #        for g in G_new_list:
 #            import matplotlib.pyplot as plt 
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        # update edge labels and adjacency matrix.
        if ds_attrs['edge_labeled']:
            G_new_list_edge = []
            for g_new in G_new_list:
                nd_list = [n for n in g_new.nodes()]
                g_tmp_list = [g_new.copy()]
                for nd1i in range(nx.number_of_nodes(g_new)): 
                    nd1 = nd_list[nd1i]# @todo: not just edges, but all pairs of nodes
                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_new)):
                        nd2 = nd_list[nd2i]
 #                for nd1, nd2, _ in g_new.edges(data=True): 
                        h_ij0_list = []
                        label_list = []
                        for label in edge_label_set:
                            h_ij0 = 0
                            for idx, g in enumerate(Gn_median):
                                pi_i = pi_p_forward[idx][nd1i]
                                pi_j = pi_p_forward[idx][nd2i]
                                h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
                                           g.has_edge(pi_i, pi_j) and 
                                           g.edges[pi_i, pi_j][edge_label] == label)
                                h_ij0 += h_ij0_p
                            h_ij0_list.append(h_ij0)
                            label_list.append(label)
                        # get the best labels.
                        idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
                        if allBestEdges: # choose all best graphs.
                            elabel_best = [label_list[idx] for idx in idx_max]
                            h_ij0_max = [h_ij0_list[idx] for idx in idx_max]
                            # generate "best" graphs with regard to "best" node labels.
                            G_new_list_ed = []
                            for g_tmp in g_tmp_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
                                for idxl, el in enumerate(elabel_best):
                                    g_tmp_copy = g_tmp.copy()
                                    # check whether a_ij is 0 or 1.
                                    sij_norm = 0
                                    for idx, g in enumerate(Gn_median):
                                        pi_i = pi_p_forward[idx][nd1i]
                                        pi_j = pi_p_forward[idx][nd2i]
                                        if g.has_node(pi_i) and g.has_node(pi_j) and \
                                            g.has_edge(pi_i, pi_j):
                                           sij_norm += 1
                                    if h_ij0_max[idxl] > len(Gn_median) * c_er / c_es + \
                                        sij_norm * (1 - (c_er + c_ei) / c_es):
                                        if not g_tmp_copy.has_edge(nd1, nd2):
                                            g_tmp_copy.add_edge(nd1, nd2)
                                        g_tmp_copy.edges[nd1, nd2][edge_label] = elabel_best[idxl]
                                    else:
                                        if g_tmp_copy.has_edge(nd1, nd2):
                                            g_tmp_copy.remove_edge(nd1, nd2)
                                    G_new_list_ed.append(g_tmp_copy)
                            g_tmp_list = [ggg.copy() for ggg in G_new_list_ed]
                        else: # choose one of the best randomly.
                            idx_rdm = random.randint(0, len(idx_max) - 1)
                            best_label = label_list[idx_max[idx_rdm]]
                            h_ij0_max = h_ij0_list[idx_max[idx_rdm]]
                            # check whether a_ij is 0 or 1.
                            sij_norm = 0
                            for idx, g in enumerate(Gn_median):
                                pi_i = pi_p_forward[idx][nd1i]
                                pi_j = pi_p_forward[idx][nd2i]
                                if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                                   sij_norm += 1
                            if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                                if not g_new.has_edge(nd1, nd2):
                                    g_new.add_edge(nd1, nd2)
                                g_new.edges[nd1, nd2][edge_label] = best_label
                            else:
 #                            elif h_ij0_max < len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                                if g_new.has_edge(nd1, nd2):
                                    g_new.remove_edge(nd1, nd2) 
                            g_tmp_list = [g_new]
                G_new_list_edge += g_tmp_list
            G_new_list = [ggg.copy() for ggg in G_new_list_edge]    
        else: # if edges are unlabeled
            # @todo: is this even right? G or g_tmp? check if the new one is right
            # @todo: works only for undirected graphs.
            for g_tmp in G_new_list:
                nd_list = [n for n in g_tmp.nodes()]
                for nd1i in range(nx.number_of_nodes(g_tmp)):
                    nd1 = nd_list[nd1i]
                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_tmp)):
                        nd2 = nd_list[nd2i]
                        sij_norm = 0
                        for idx, g in enumerate(Gn_median):
                            pi_i = pi_p_forward[idx][nd1i]
                            pi_j = pi_p_forward[idx][nd2i]
                            if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                               sij_norm += 1
                        if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
                            # @todo: should we consider if nd1 and nd2 in g_tmp?
                            # or just add the edge anyway?
                            if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
                                and not g_tmp.has_edge(nd1, nd2):
                                g_tmp.add_edge(nd1, nd2)
                        else: # @todo: which to use?
 #                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
                            if g_tmp.has_edge(nd1, nd2):
                                g_tmp.remove_edge(nd1, nd2)
                        # do not change anything when equal.     
 #        for i, g in enumerate(G_new_list):
 #            import matplotlib.pyplot as plt 
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
 #        # find the best graph generated in this iteration and update pi_p.
        # @todo: should we update all graphs generated or just the best ones?
        dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, 
            params_ged=params_ged)
        # @todo: should we remove the identical and connectivity check? 
        # Don't know which is faster.
        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
            G_new_list, idx_list = remove_duplicates(G_new_list)
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
            dis_list = [dis_list[idx] for idx in idx_list]
 #        if connected == True:
 #            G_new_list, idx_list = remove_disconnected(G_new_list)
 #            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
 #        dis_min = dis_list[idx_min_tmp_list[0]]
 #        pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
 #        G_new_list = [G_new_list[idx] for idx in idx_min_list] 
 #        for g in G_new_list:
 #            import matplotlib.pyplot as plt 
 #            nx.draw_networkx(g)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        return G_new_list, pi_forward_list, dis_list
    def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
        idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
        dis_min = dis_all[idx_min_list[0]]
        pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
        G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
        return G_min_list, pi_forward_min_list, dis_min
    def iteration_proc(G, pi_p_forward, cur_sod):
        G_list = [G]
        pi_forward_list = [pi_p_forward]
        old_sod = cur_sod * 2
        sod_list = [cur_sod]
        dis_list = [cur_sod]
        # iterations.
        itr = 0
        # @todo: what if difference == 0?
 #        while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or
 #                                 np.abs(old_sod - cur_sod) == 0):
        while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
 #        while itr < ite_max:
 #        for itr in range(0, 5): # the convergence condition?
            print('itr_iam is', itr)
            G_new_list = []
            pi_forward_new_list = []
            dis_new_list = []
            for idx, g in enumerate(G_list):
 #                label_set = get_node_labels(Gn_median + [g], node_label)                        
                G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
                    g, pi_forward_list[idx])
                G_new_list += G_tmp_list
                pi_forward_new_list += pi_forward_tmp_list
                dis_new_list += dis_tmp_list
            # @todo: need to remove duplicates here?
            G_list = [ggg.copy() for ggg in G_new_list]
            pi_forward_list = [pitem.copy() for pitem in pi_forward_new_list]
            dis_list = dis_new_list[:]
            old_sod = cur_sod
            cur_sod = np.min(dis_list)
            sod_list.append(cur_sod)
            itr += 1
        # @todo: do we return all graphs or the best ones?
        # get the best ones of the generated graphs.
        G_list, pi_forward_list, dis_min = best_median_graphs(
            G_list, pi_forward_list, dis_list)
        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
            G_list, idx_list = remove_duplicates(G_list)
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #            dis_list = [dis_list[idx] for idx in idx_list]
 #        import matplotlib.pyplot as plt
 #        for g in G_list:             
 #            nx.draw_networkx(g)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        print('\nsods:', sod_list, '\n')
        return G_list, pi_forward_list, dis_min, sod_list
    def remove_duplicates(Gn):
        """Remove duplicate graphs from list.
        """
        Gn_new = []
        idx_list = []
        for idx, g in enumerate(Gn):
            dupl = False
            for g_new in Gn_new:
                if graph_isIdentical(g_new, g):
                    dupl = True
                    break
            if not dupl:
                Gn_new.append(g)
                idx_list.append(idx)
        return Gn_new, idx_list
    def remove_disconnected(Gn):
        """Remove disconnected graphs from list.
        """
        Gn_new = []
        idx_list = []
        for idx, g in enumerate(Gn):
            if nx.is_connected(g):
                Gn_new.append(g)
                idx_list.append(idx)
        return Gn_new, idx_list
    ###########################################################################
    # phase 1: initilize.
    # compute set-median.
    dis_min = np.inf
    dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
        params_ged=params_ged, parallel=True)
    print('finish computing GEDs.')
    # find all smallest distances.
    if allBestInit: # try all best init graphs.
        idx_min_list = range(len(dis_list))
        dis_min = dis_list
    else:
        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
        dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list)
        idx_min_rdm = random.randint(0, len(idx_min_list) - 1)
        idx_min_list = [idx_min_list[idx_min_rdm]]
    sod_set_median = np.min(dis_min)
    # phase 2: iteration.
    G_list = []
    dis_list = []
    pi_forward_list = []
    G_set_median_list = []
 #    sod_list = []
    for idx_tmp, idx_min in enumerate(idx_min_list):
 #        print('idx_min is', idx_min)
        G = Gn_candidate[idx_min].copy()
        G_set_median_list.append(G.copy())
        # list of edit operations.        
        pi_p_forward = pi_forward_all[idx_min]
 #        pi_p_backward = pi_all_backward[idx_min]        
        Gi_list, pi_i_forward_list, dis_i_min, sod_list = iteration_proc(G, 
                                                pi_p_forward, dis_min[idx_tmp])            
        G_list += Gi_list
        dis_list += [dis_i_min] * len(Gi_list)
        pi_forward_list += pi_i_forward_list
    if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
        G_list, idx_list = remove_duplicates(G_list)
        dis_list = [dis_list[idx] for idx in idx_list]
        pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
    if connected == True:
        G_list_con, idx_list = remove_disconnected(G_list)
        # if there is no connected graphs at all, then remain the disconnected ones.
        if len(G_list_con) > 0: # @todo: ??????????????????????????
            G_list = G_list_con
            dis_list = [dis_list[idx] for idx in idx_list]
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #    import matplotlib.pyplot as plt 
 #    for g in G_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    # get the best median graphs
    G_gen_median_list, pi_forward_min_list, sod_gen_median = best_median_graphs(
            G_list, pi_forward_list, dis_list)
 #    for g in G_gen_median_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    if not allBestOutput:
        # randomly choose one graph.
        idx_rdm = random.randint(0, len(G_gen_median_list) - 1)
        G_gen_median_list = [G_gen_median_list[idx_rdm]]
    return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median
 def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
             dataset='monoterpenoides',
             graph_dir=''):
    """Compute the iam by c++ implementation (gedlib) through bash.
    """
    import os
    import time
    def createCollectionFile(Gn_names, y, filename):
        """Create collection file.
        """
        dirname_ds = os.path.dirname(filename)
        if dirname_ds != '':
            dirname_ds += '/'
            if not os.path.exists(dirname_ds) :
                os.makedirs(dirname_ds)
        with open(filename + '.xml', 'w') as fgroup:
            fgroup.write("<?xml version=\"1.0\"?>")
            fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">")
            fgroup.write("\n<GraphCollection>")
            for idx, fname in enumerate(Gn_names):
                fgroup.write("\n\t<graph file=\"" + fname + "\" class=\"" + str(y[idx]) + "\"/>")
            fgroup.write("\n</GraphCollection>")
            fgroup.close()
    tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
    fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
    createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection)
 #    fn_collection = tmp_dir + 'collection_for_debug'
 #    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl'
 #    if dataset == 'Letter-high' or dataset == 'Fingerprint':
 #        dataset = 'letter'
    command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
    command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
    command += 'export LD_LIBRARY_PATH\n'
    command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
    command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
            + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' '
    if edit_cost_constant is None:
        command += 'None'
    else:
        for ec in edit_cost_constant:
            command += str(ec) + ' '
 #        output = os.system(command)
    stream = os.popen(command)
    output = stream.readlines()    
 #    print(output)
    sod_sm = float(output[0].strip())
    sod_gm = float(output[1].strip())
    fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
    fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
    return sod_sm, sod_gm, fname_sm, fname_gm
 ###############################################################################
 # Old implementations.
 def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', 
        connected=True):
    """See my name, then you know what I do.
    """
 #    Gn = Gn[0:10]
    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
    # phase 1: initilize.
    # compute set-median.
    dis_min = np.inf
    pi_p = []
    pi_all = []
    for idx1, G_p in enumerate(Gn):
        dist_sum = 0
        pi_all.append([])
        for idx2, G_p_prime in enumerate(Gn):
            dist_tmp, pi_tmp, _ = GED(G_p, G_p_prime)
            pi_all[idx1].append(pi_tmp)
            dist_sum += dist_tmp
        if dist_sum < dis_min:
            dis_min = dist_sum
            G = G_p.copy()
            idx_min = idx1
    # list of edit operations.        
    pi_p = pi_all[idx_min]
    # phase 2: iteration.
    ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'], 
                                      edge_label=edge_label)
    for itr in range(0, 10): # @todo: the convergence condition?
        G_new = G.copy()
        # update vertex labels.
        # pre-compute h_i0 for each label.
 #        for label in get_node_labels(Gn, node_label):
 #            print(label)
 #        for nd in G.nodes(data=True):
 #            pass
        if not ds_attrs['node_attr_dim']: # labels are symbolic
            for nd, _ in G.nodes(data=True):
                h_i0_list = []
                label_list = []
                for label in get_node_labels(Gn, node_label):
                    h_i0 = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p[idx][nd]
                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
                            h_i0 += 1
                    h_i0_list.append(h_i0)
                    label_list.append(label)
                # choose one of the best randomly.
                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
                idx_rdm = random.randint(0, len(idx_max) - 1)
                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
        else: # labels are non-symbolic
            for nd, _ in G.nodes(data=True):
                Si_norm = 0
                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
                for idx, g in enumerate(Gn):
                    pi_i = pi_p[idx][nd]
                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
                        Si_norm += 1
                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
                phi_i_bar /= Si_norm
                G_new.nodes[nd]['attributes'] = phi_i_bar
        # update edge labels and adjacency matrix.
        if ds_attrs['edge_labeled']:
            for nd1, nd2, _ in G.edges(data=True):
                h_ij0_list = []
                label_list = []
                for label in get_edge_labels(Gn, edge_label):
                    h_ij0 = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p[idx][nd1]
                        pi_j = pi_p[idx][nd2]
                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
                                   g.has_edge(pi_i, pi_j) and 
                                   g.edges[pi_i, pi_j][edge_label] == label)
                        h_ij0 += h_ij0_p
                    h_ij0_list.append(h_ij0)
                    label_list.append(label)
                # choose one of the best randomly.
                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
                h_ij0_max = h_ij0_list[idx_max[0]]
                idx_rdm = random.randint(0, len(idx_max) - 1)
                best_label = label_list[idx_max[idx_rdm]]
                # check whether a_ij is 0 or 1.
                sij_norm = 0
                for idx, g in enumerate(Gn):
                    pi_i = pi_p[idx][nd1]
                    pi_j = pi_p[idx][nd2]
                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                       sij_norm += 1
                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                    if not G_new.has_edge(nd1, nd2):
                        G_new.add_edge(nd1, nd2)
                    G_new.edges[nd1, nd2][edge_label] = best_label
                else:
                    if G_new.has_edge(nd1, nd2):
                        G_new.remove_edge(nd1, nd2)                
        else: # if edges are unlabeled
            for nd1, nd2, _ in G.edges(data=True):
                sij_norm = 0
                for idx, g in enumerate(Gn):
                    pi_i = pi_p[idx][nd1]
                    pi_j = pi_p[idx][nd2]
                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                       sij_norm += 1
                if sij_norm > len(Gn) * c_er / (c_er + c_ei):
                    if not G_new.has_edge(nd1, nd2):
                        G_new.add_edge(nd1, nd2)
                else:
                    if G_new.has_edge(nd1, nd2):
                        G_new.remove_edge(nd1, nd2)
        G = G_new.copy()
        # update pi_p
        pi_p = []
        for idx1, G_p in enumerate(Gn):
            dist_tmp, pi_tmp, _ = GED(G, G_p)
            pi_p.append(pi_tmp)
    return G
 # --------------------------- These are tests --------------------------------#
 def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1, 
                                      node_label='atom', edge_label='bond_type'):
    """See my name, then you know what I do.
    """
 #    Gn = Gn[0:10]
    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
    # phase 1: initilize.
    # compute set-median.
    dis_min = np.inf
 #    pi_p = []
    pi_all_forward = []
    pi_all_backward = []
    for idx1, G_p in tqdm(enumerate(G_candidate), desc='computing GEDs', file=sys.stdout):
        dist_sum = 0
        pi_all_forward.append([])
        pi_all_backward.append([])
        for idx2, G_p_prime in enumerate(Gn):
            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_p, G_p_prime)
            pi_all_forward[idx1].append(pi_tmp_forward)
            pi_all_backward[idx1].append(pi_tmp_backward)
            dist_sum += dist_tmp
        if dist_sum <= dis_min:
            dis_min = dist_sum
            G = G_p.copy()
            idx_min = idx1
    # list of edit operations.        
    pi_p_forward = pi_all_forward[idx_min]
    pi_p_backward = pi_all_backward[idx_min]
    # phase 2: iteration.
    ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], 
                                      edge_label=edge_label)
    label_set = get_node_labels(Gn + [G], node_label)
    for itr in range(0, 10): # @todo: the convergence condition?
        G_new = G.copy()
        # update vertex labels.
        # pre-compute h_i0 for each label.
 #        for label in get_node_labels(Gn, node_label):
 #            print(label)
 #        for nd in G.nodes(data=True):
 #            pass
        if not ds_attrs['node_attr_dim']: # labels are symbolic
            for nd in G.nodes():
                h_i0_list = []
                label_list = []
                for label in label_set:
                    h_i0 = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd]
                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
                            h_i0 += 1
                    h_i0_list.append(h_i0)
                    label_list.append(label)
                # choose one of the best randomly.
                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
                idx_rdm = random.randint(0, len(idx_max) - 1)
                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
        else: # labels are non-symbolic
            for nd in G.nodes():
                Si_norm = 0
                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
                for idx, g in enumerate(Gn):
                    pi_i = pi_p_forward[idx][nd]
                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
                        Si_norm += 1
                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
                phi_i_bar /= Si_norm
                G_new.nodes[nd]['attributes'] = phi_i_bar
        # update edge labels and adjacency matrix.
        if ds_attrs['edge_labeled']:
            for nd1, nd2, _ in G.edges(data=True):
                h_ij0_list = []
                label_list = []
                for label in get_edge_labels(Gn, edge_label):
                    h_ij0 = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd1]
                        pi_j = pi_p_forward[idx][nd2]
                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
                                   g.has_edge(pi_i, pi_j) and 
                                   g.edges[pi_i, pi_j][edge_label] == label)
                        h_ij0 += h_ij0_p
                    h_ij0_list.append(h_ij0)
                    label_list.append(label)
                # choose one of the best randomly.
                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
                h_ij0_max = h_ij0_list[idx_max[0]]
                idx_rdm = random.randint(0, len(idx_max) - 1)
                best_label = label_list[idx_max[idx_rdm]]
                # check whether a_ij is 0 or 1.
                sij_norm = 0
                for idx, g in enumerate(Gn):
                    pi_i = pi_p_forward[idx][nd1]
                    pi_j = pi_p_forward[idx][nd2]
                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                       sij_norm += 1
                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                    if not G_new.has_edge(nd1, nd2):
                        G_new.add_edge(nd1, nd2)
                    G_new.edges[nd1, nd2][edge_label] = best_label
                else:
                    if G_new.has_edge(nd1, nd2):
                        G_new.remove_edge(nd1, nd2)                
        else: # if edges are unlabeled
            # @todo: works only for undirected graphs.
            for nd1 in range(nx.number_of_nodes(G)):
                for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
                    sij_norm = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd1]
                        pi_j = pi_p_forward[idx][nd2]
                        if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                           sij_norm += 1
                    if sij_norm > len(Gn) * c_er / (c_er + c_ei):
                        if not G_new.has_edge(nd1, nd2):
                            G_new.add_edge(nd1, nd2)
                    elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
                        if G_new.has_edge(nd1, nd2):
                            G_new.remove_edge(nd1, nd2)
                    # do not change anything when equal.
        G = G_new.copy()
        # update pi_p
        pi_p_forward = []
        for G_p in Gn:
            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
            pi_p_forward.append(pi_tmp_forward)
    return G
 ###############################################################################
 if __name__ == '__main__':
    from gklearn.utils.graphfiles import loadDataset
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
          'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}  # node/edge symb
 #    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
 #          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
 #          'extra_params': {}}
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    iam(Gn)
--- a/gklearn/preimage/knn.py
+++ b/gklearn/preimage/knn.py
@@ -1,114 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Fri Jan 10 13:22:04 2020
@author: ljia
 """
 import numpy as np
 #import matplotlib.pyplot as plt
 from tqdm import tqdm
 import random
 #import csv
 from shutil import copyfile
 import os
 from gklearn.preimage.iam import iam_bash
 from gklearn.utils.graphfiles import loadDataset, loadGXL
 from gklearn.preimage.ged import GED
 from gklearn.preimage.utils import get_same_item_indices
 def test_knn():
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
 #    gkernel = 'treeletkernel'
 #    node_label = 'atom'
 #    edge_label = 'bond_type'
 #    ds_name = 'mono'
    dir_output = 'results/knn/'
    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'
    k_nn = 1
    percent = 0.1
    repeats = 50
    edit_cost_constant = [3, 3, 1, 3, 3, 1]
    # get indices by classes.
    y_idx = get_same_item_indices(y_all)
    sod_sm_list_list
    for repeat in range(0, repeats):
        print('\n---------------------------------')
        print('repeat =', repeat)
        accuracy_sm_list = []
        accuracy_gm_list = []
        sod_sm_list = []
        sod_gm_list = []
        random.seed(repeat)
        set_median_list = []
        gen_median_list = []
        train_y_set = []
        for y, values in y_idx.items():
            print('\ny =', y)
            size_median_set = int(len(values) * percent)
            median_set_idx = random.sample(values, size_median_set)
            print('median set: ', median_set_idx)
            # compute set median and gen median using IAM (C++ through bash).
    #        Gn_median = [Gn[idx] for idx in median_set_idx]
            group_fnames = [Gn[g].graph['filename'] for g in median_set_idx]
            sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant,
                                                          graph_dir=graph_dir)
            print('sod_sm, sod_gm:', sod_sm, sod_gm)
            sod_sm_list.append(sod_sm)
            sod_gm_list.append(sod_gm)
            fname_sm_new = dir_output + 'medians/set_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
            copyfile(fname_sm, fname_sm_new)
            fname_gm_new = dir_output + 'medians/gen_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
            copyfile(fname_gm, fname_gm_new)
            set_median_list.append(loadGXL(fname_sm_new))
            gen_median_list.append(loadGXL(fname_gm_new))
            train_y_set.append(int(y))
        print(sod_sm, sod_gm)
        # do 1-nn.
        test_y_set = [int(y) for y in y_all]
        accuracy_sm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
        accuracy_gm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
        accuracy_sm_list.append(accuracy_sm)
        accuracy_gm_list.append(accuracy_gm)
        print('current accuracy sm and gm:', accuracy_sm, accuracy_gm)
    # output
    accuracy_sm_mean = np.mean(accuracy_sm_list)
    accuracy_gm_mean = np.mean(accuracy_gm_list)
    print('\ntotal average accuracy sm and gm:', accuracy_sm_mean, accuracy_gm_mean)
 def knn(train_set, train_y_set, test_set, test_y_set, k=1, distance='ged'):
    if k == 1 and distance == 'ged':
        algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
                    'algo_options': algo_options, 'stabilizer': None}
        accuracy = 0
        for idx_test, g_test in tqdm(enumerate(test_set), desc='computing 1-nn', 
                                     file=sys.stdout):
            dis = np.inf
            for idx_train, g_train in enumerate(train_set):
                dis_cur, _, _ = GED(g_test, g_train, **params_ged)
                if dis_cur < dis:
                    dis = dis_cur
                    test_y_cur = train_y_set[idx_train]
            if test_y_cur == test_y_set[idx_test]:
                accuracy += 1
        accuracy = accuracy / len(test_set)
    return accuracy
 if __name__ == '__main__':
    test_knn()
--- a/gklearn/preimage/libs.py
+++ b/gklearn/preimage/libs.py
@@ -1,6 +0,0 @@
 import sys
 import pathlib
 # insert gedlibpy library.
 sys.path.insert(0, "../../../")
 from gedlibpy import librariesImport, gedlibpy
--- a/gklearn/preimage/median.py
+++ b/gklearn/preimage/median.py
@@ -1,218 +0,0 @@
 import sys
 sys.path.insert(0, "../")
 #import pathlib
 import numpy as np
 import networkx as nx
 import time
 from gedlibpy import librariesImport, gedlibpy
 #import script
 sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
 import gklearn
 from gklearn.utils.graphfiles import loadDataset
 def replace_graph_in_env(script, graph, old_id, label='median'):
    """
    Replace a graph in script
    If old_id is -1, add a new graph to the environnemt
    """
    if(old_id > -1):
        script.PyClearGraph(old_id)
    new_id = script.PyAddGraph(label)
    for i in graph.nodes():
        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
    for e in graph.edges:
        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
    script.PyInitEnv()
    script.PySetMethod("IPFP", "")
    script.PyInitMethod()
    return new_id
 #Dessin median courrant
 def draw_Letter_graph(graph, savepath=''):
    import numpy as np
    import networkx as nx
    import matplotlib.pyplot as plt
    plt.figure()
    pos = {}
    for n in graph.nodes:
        pos[n] = np.array([float(graph.node[n]['attributes'][0]),
           float(graph.node[n]['attributes'][1])])
    nx.draw_networkx(graph, pos)
    if savepath != '':
        plt.savefig(savepath + str(time.time()) + '.eps', format='eps', dpi=300)
    plt.show()
    plt.clf()
 #compute new mappings
 def update_mappings(script,median_id,listID):
    med_distances = {}
    med_mappings = {}
    sod = 0
    for i in range(0,len(listID)):
        script.PyRunMethod(median_id,listID[i])
        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
        sod += med_distances[i]
    return med_distances, med_mappings, sod
 def calcul_Sij(all_mappings, all_graphs,i,j):
    s_ij = 0
    for k in range(0,len(all_mappings)):
        cur_graph =  all_graphs[k]
        cur_mapping = all_mappings[k]
        size_graph = cur_graph.order()
        if ((cur_mapping[i] < size_graph) and 
            (cur_mapping[j] < size_graph) and 
            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
                s_ij += 1
    return s_ij
 # def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
 #     from scipy.stats.mstats import gmean
 #     for i in median.nodes():
 #         for k in listIdSet:
 #             vectors = [] #np.zeros((len(listIdSet),2))
 #             if(k != median_id):
 #                 phi_i = mappings[k][i]
 #                 if(phi_i < dataset[k].order()):
 #                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
 #         new_labels = gmean(vectors)
 #         median.node[i]['x'] = str(new_labels[0])
 #         median.node[i]['y'] = str(new_labels[1])
 #     return median
 def update_median_nodes(median,dataset,mappings):
    #update node attributes
    for i in median.nodes():
        nb_sub=0
        mean_label = {'x' : 0, 'y' : 0}
        for k in range(0,len(mappings)):
            phi_i = mappings[k][i]
            if ( phi_i < dataset[k].order() ):
                nb_sub += 1
                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
    return median
 def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
 #for letter high, ceir = 1.7, alpha = 0.75
    size_dataset = len(dataset)
    ratio_cei_cer = cer/(cei + cer)
    threshold = size_dataset*ratio_cei_cer
    order_graph_median = median.order()
    for i in range(0,order_graph_median):
        for j in range(i+1,order_graph_median):
            s_ij = calcul_Sij(mappings,dataset,i,j)
            if(s_ij > threshold):
                median.add_edge(i,j)
            else:
                if(median.has_edge(i,j)):
                    median.remove_edge(i,j)
    return median
 def compute_median(script, listID, dataset,verbose=False):
    """Compute a graph median of a dataset according to an environment
    Parameters
    script : An gedlib initialized environnement 
    listID (list): a list of ID in script: encodes the dataset 
    dataset (list): corresponding graphs in networkX format. We assume that graph
    listID[i] corresponds to dataset[i]
    Returns:
    A networkX graph, which is the median, with corresponding sod
    """
    print(len(listID))
    median_set_index, median_set_sod = compute_median_set(script, listID)
    print(median_set_index)
    print(median_set_sod)
    sods = []
    #Ajout median dans environnement
    set_median = dataset[median_set_index].copy()
    median = dataset[median_set_index].copy()
    cur_med_id = replace_graph_in_env(script,median,-1)
    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
    sods.append(cur_sod)
    if(verbose):
        print(cur_sod)
    ite_max = 50
    old_sod = cur_sod * 2
    ite = 0
    epsilon = 0.001
    best_median 
    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
        median = update_median_nodes(median,dataset, med_mappings)
        median = update_median_edges(dataset,med_mappings,median)
        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
        sods.append(cur_sod)
        if(verbose):
            print(cur_sod)
        ite += 1
    return median, cur_sod, sods, set_median
    draw_Letter_graph(median)
 def compute_median_set(script,listID):
    'Returns the id in listID corresponding to median set'
    #Calcul median set
    N=len(listID)
    map_id_to_index = {}
    map_index_to_id = {}
    for i in range(0,len(listID)):
        map_id_to_index[listID[i]] = i
        map_index_to_id[i] = listID[i]
    distances = np.zeros((N,N))
    for i in listID:
        for j in listID:
            script.PyRunMethod(i,j)
            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
    median_set_index = np.argmin(np.sum(distances,0))
    sod = np.min(np.sum(distances,0))
    return median_set_index, sod
 if __name__ == "__main__":
    #Chargement du dataset
    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
    script.PySetEditCost("LETTER")
    script.PyInitEnv()
    script.PySetMethod("IPFP", "")
    script.PyInitMethod()
    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
    listID = script.PyGetAllGraphIds()
    median, sod = compute_median(script,listID,dataset,verbose=True)
    print(sod)
    draw_Letter_graph(median)
 #if __name__ == '__main__':
 #    # test draw_Letter_graph
 #    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
 #          'extra_params': {}} # node nsymb
 #    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    print(y_all)
 #    for g in Gn:
 #        draw_Letter_graph(g)
--- a/gklearn/preimage/median_benoit.py
+++ b/gklearn/preimage/median_benoit.py
@@ -1,201 +0,0 @@
 import sys
 import pathlib
 import numpy as np
 import networkx as nx
 import librariesImport
 import script
 sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
 import gklearn
 def replace_graph_in_env(script, graph, old_id, label='median'):
    """
    Replace a graph in script
    If old_id is -1, add a new graph to the environnemt
    """
    if(old_id > -1):
        script.PyClearGraph(old_id)
    new_id = script.PyAddGraph(label)
    for i in graph.nodes():
        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
    for e in graph.edges:
        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
    script.PyInitEnv()
    script.PySetMethod("IPFP", "")
    script.PyInitMethod()
    return new_id
 #Dessin median courrant
 def draw_Letter_graph(graph):
    import numpy as np
    import networkx as nx
    import matplotlib.pyplot as plt
    plt.figure()
    pos = {}
    for n in graph.nodes:
        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
    nx.draw_networkx(graph,pos)
    plt.show()
 #compute new mappings
 def update_mappings(script,median_id,listID):
    med_distances = {}
    med_mappings = {}
    sod = 0
    for i in range(0,len(listID)):
        script.PyRunMethod(median_id,listID[i])
        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
        sod += med_distances[i]
    return med_distances, med_mappings, sod
 def calcul_Sij(all_mappings, all_graphs,i,j):
    s_ij = 0
    for k in range(0,len(all_mappings)):
        cur_graph =  all_graphs[k]
        cur_mapping = all_mappings[k]
        size_graph = cur_graph.order()
        if ((cur_mapping[i] < size_graph) and 
            (cur_mapping[j] < size_graph) and 
            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
                s_ij += 1
    return s_ij
 # def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
 #     from scipy.stats.mstats import gmean
 #     for i in median.nodes():
 #         for k in listIdSet:
 #             vectors = [] #np.zeros((len(listIdSet),2))
 #             if(k != median_id):
 #                 phi_i = mappings[k][i]
 #                 if(phi_i < dataset[k].order()):
 #                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
 #         new_labels = gmean(vectors)
 #         median.node[i]['x'] = str(new_labels[0])
 #         median.node[i]['y'] = str(new_labels[1])
 #     return median
 def update_median_nodes(median,dataset,mappings):
    #update node attributes
    for i in median.nodes():
        nb_sub=0
        mean_label = {'x' : 0, 'y' : 0}
        for k in range(0,len(mappings)):
            phi_i = mappings[k][i]
            if ( phi_i < dataset[k].order() ):
                nb_sub += 1
                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
    return median
 def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
 #for letter high, ceir = 1.7, alpha = 0.75
    size_dataset = len(dataset)
    ratio_cei_cer = cer/(cei + cer)
    threshold = size_dataset*ratio_cei_cer
    order_graph_median = median.order()
    for i in range(0,order_graph_median):
        for j in range(i+1,order_graph_median):
            s_ij = calcul_Sij(mappings,dataset,i,j)
            if(s_ij > threshold):
                median.add_edge(i,j)
            else:
                if(median.has_edge(i,j)):
                    median.remove_edge(i,j)
    return median
 def compute_median(script, listID, dataset,verbose=False):
    """Compute a graph median of a dataset according to an environment
    Parameters
    script : An gedlib initialized environnement 
    listID (list): a list of ID in script: encodes the dataset 
    dataset (list): corresponding graphs in networkX format. We assume that graph
    listID[i] corresponds to dataset[i]
    Returns:
    A networkX graph, which is the median, with corresponding sod
    """
    print(len(listID))
    median_set_index, median_set_sod = compute_median_set(script, listID)
    print(median_set_index)
    print(median_set_sod)
    sods = []
    #Ajout median dans environnement
    set_median = dataset[median_set_index].copy()
    median = dataset[median_set_index].copy()
    cur_med_id = replace_graph_in_env(script,median,-1)
    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
    sods.append(cur_sod)
    if(verbose):
        print(cur_sod)
    ite_max = 50
    old_sod = cur_sod * 2
    ite = 0
    epsilon = 0.001
    best_median 
    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
        median = update_median_nodes(median,dataset, med_mappings)
        median = update_median_edges(dataset,med_mappings,median)
        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
        sods.append(cur_sod)
        if(verbose):
            print(cur_sod)
        ite += 1
    return median, cur_sod, sods, set_median
    draw_Letter_graph(median)
 def compute_median_set(script,listID):
    'Returns the id in listID corresponding to median set'
    #Calcul median set
    N=len(listID)
    map_id_to_index = {}
    map_index_to_id = {}
    for i in range(0,len(listID)):
        map_id_to_index[listID[i]] = i
        map_index_to_id[i] = listID[i]
    distances = np.zeros((N,N))
    for i in listID:
        for j in listID:
            script.PyRunMethod(i,j)
            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
    median_set_index = np.argmin(np.sum(distances,0))
    sod = np.min(np.sum(distances,0))
    return median_set_index, sod
 if __name__ == "__main__":
    #Chargement du dataset
    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
    script.PySetEditCost("LETTER")
    script.PyInitEnv()
    script.PySetMethod("IPFP", "")
    script.PyInitMethod()
    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
    listID = script.PyGetAllGraphIds()
    median, sod = compute_median(script,listID,dataset,verbose=True)
    print(sod)
    draw_Letter_graph(median)
--- a/gklearn/preimage/median_linlin.py
+++ b/gklearn/preimage/median_linlin.py
@@ -1,215 +0,0 @@
 import sys
 import pathlib
 import numpy as np
 import networkx as nx
 from gedlibpy import librariesImport, gedlibpy
 sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
 import gklearn
 def replace_graph_in_env(script, graph, old_id, label='median'):
    """
    Replace a graph in script
    If old_id is -1, add a new graph to the environnemt
    """
    if(old_id > -1):
        script.PyClearGraph(old_id)
    new_id = script.PyAddGraph(label)
    for i in graph.nodes():
        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
    for e in graph.edges:
        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
    script.PyInitEnv()
    script.PySetMethod("IPFP", "")
    script.PyInitMethod()
    return new_id
 #Dessin median courrant
 def draw_Letter_graph(graph):
    import numpy as np
    import networkx as nx
    import matplotlib.pyplot as plt
    plt.figure()
    pos = {}
    for n in graph.nodes:
        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
    nx.draw_networkx(graph,pos)
    plt.show()
 #compute new mappings
 def update_mappings(script,median_id,listID):
    med_distances = {}
    med_mappings = {}
    sod = 0
    for i in range(0,len(listID)):
        script.PyRunMethod(median_id,listID[i])
        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
        sod += med_distances[i]
    return med_distances, med_mappings, sod
 def calcul_Sij(all_mappings, all_graphs,i,j):
    s_ij = 0
    for k in range(0,len(all_mappings)):
        cur_graph =  all_graphs[k]
        cur_mapping = all_mappings[k]
        size_graph = cur_graph.order()
        if ((cur_mapping[i] < size_graph) and 
            (cur_mapping[j] < size_graph) and 
            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
                s_ij += 1
    return s_ij
 # def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
 #     from scipy.stats.mstats import gmean
 #     for i in median.nodes():
 #         for k in listIdSet:
 #             vectors = [] #np.zeros((len(listIdSet),2))
 #             if(k != median_id):
 #                 phi_i = mappings[k][i]
 #                 if(phi_i < dataset[k].order()):
 #                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
 #         new_labels = gmean(vectors)
 #         median.node[i]['x'] = str(new_labels[0])
 #         median.node[i]['y'] = str(new_labels[1])
 #     return median
 def update_median_nodes(median,dataset,mappings):
    #update node attributes
    for i in median.nodes():
        nb_sub=0
        mean_label = {'x' : 0, 'y' : 0}
        for k in range(0,len(mappings)):
            phi_i = mappings[k][i]
            if ( phi_i < dataset[k].order() ):
                nb_sub += 1
                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
    return median
 def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
 #for letter high, ceir = 1.7, alpha = 0.75
    size_dataset = len(dataset)
    ratio_cei_cer = cer/(cei + cer)
    threshold = size_dataset*ratio_cei_cer
    order_graph_median = median.order()
    for i in range(0,order_graph_median):
        for j in range(i+1,order_graph_median):
            s_ij = calcul_Sij(mappings,dataset,i,j)
            if(s_ij > threshold):
                median.add_edge(i,j)
            else:
                if(median.has_edge(i,j)):
                    median.remove_edge(i,j)
    return median
 def compute_median(script, listID, dataset,verbose=False):
    """Compute a graph median of a dataset according to an environment
    Parameters
    script : An gedlib initialized environnement 
    listID (list): a list of ID in script: encodes the dataset 
    dataset (list): corresponding graphs in networkX format. We assume that graph
    listID[i] corresponds to dataset[i]
    Returns:
    A networkX graph, which is the median, with corresponding sod
    """
    print(len(listID))
    median_set_index, median_set_sod = compute_median_set(script, listID)
    print(median_set_index)
    print(median_set_sod)
    sods = []
    #Ajout median dans environnement
    set_median = dataset[median_set_index].copy()
    median = dataset[median_set_index].copy()
    cur_med_id = replace_graph_in_env(script,median,-1)
    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
    sods.append(cur_sod)
    if(verbose):
        print(cur_sod)
    ite_max = 50
    old_sod = cur_sod * 2
    ite = 0
    epsilon = 0.001
    best_median 
    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
        median = update_median_nodes(median,dataset, med_mappings)
        median = update_median_edges(dataset,med_mappings,median)
        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
        sods.append(cur_sod)
        if(verbose):
            print(cur_sod)
        ite += 1
    return median, cur_sod, sods, set_median
    draw_Letter_graph(median)
 def compute_median_set(script,listID):
    'Returns the id in listID corresponding to median set'
    #Calcul median set
    N=len(listID)
    map_id_to_index = {}
    map_index_to_id = {}
    for i in range(0,len(listID)):
        map_id_to_index[listID[i]] = i
        map_index_to_id[i] = listID[i]
    distances = np.zeros((N,N))
    for i in listID:
        for j in listID:
            script.PyRunMethod(i,j)
            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
    median_set_index = np.argmin(np.sum(distances,0))
    sod = np.min(np.sum(distances,0))
    return median_set_index, sod
 def _convertGraph(G):
    """Convert a graph to the proper NetworkX format that can be
    recognized by library gedlibpy.
    """
    G_new = nx.Graph()
    for nd, attrs in G.nodes(data=True):
        G_new.add_node(str(nd), chem=attrs['atom'])
 #                G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
 #                               y=str(attrs['attributes'][1]))
    for nd1, nd2, attrs in G.edges(data=True):
        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
 #                G_new.add_edge(str(nd1), str(nd2))
    return G_new
 if __name__ == "__main__":
    #Chargement du dataset
    gedlibpy.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
    gedlibpy.PySetEditCost("LETTER")
    gedlibpy.PyInitEnv()
    gedlibpy.PySetMethod("IPFP", "")
    gedlibpy.PyInitMethod()
    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
    listID = gedlibpy.PyGetAllGraphIds()
    median, sod = compute_median(gedlibpy,listID,dataset,verbose=True)
    print(sod)
    draw_Letter_graph(median)
--- a/gklearn/preimage/pathfrequency.py
+++ b/gklearn/preimage/pathfrequency.py
@@ -1,201 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Mar 20 10:12:15 2019
 inferring a graph grom path frequency.
@author: ljia
 """
 #import numpy as np
 import networkx as nx
 from scipy.spatial.distance import hamming
 import itertools
 def SISF(K, v):
    if output:
        return output
    else:
        return 'no solution'
 def SISF_M(K, v):
    return output
 def GIPF_tree(v_obj, K=1, alphabet=[0, 1]):
    if K == 1:
        n_graph = v_obj[0] + v_obj[1]
        D_T, father_idx = getDynamicTable(n_graph, alphabet)
        # get the vector the closest to v_obj.
        if v_obj not in D_T:
            print('no exact solution')
            dis_lim = 1 / len(v_obj) # the possible shortest distance.
            dis_min = 1.0 # minimum proportional distance
            v_min = v_obj
            for vc in D_T:
                if vc[0] + vc[1] == n_graph:
 #                    print(vc)
                    dis = hamming(vc, v_obj)
                    if dis < dis_min:
                        dis_min = dis
                        v_min = vc
                    if dis_min <= dis_lim:
                        break
            v_obj = v_min
        # obtain required graph by traceback procedure.        
        return getObjectGraph(v_obj, D_T, father_idx, alphabet), v_obj
 def GIPF_M(K, v):
    return G
 def getDynamicTable(n_graph, alphabet=[0, 1]):
    # init. When only one node exists.
    D_T = {(1, 0, 0, 0, 0, 0): 1, (0, 1, 0, 0, 0, 0): 1, (0, 0, 1, 0, 0, 0): 0, 
           (0, 0, 0, 1, 0, 0): 0, (0, 0, 0, 0, 1, 0): 0, (0, 0, 0, 0, 0, 1): 0,}
    D_T = [(1, 0, 0, 0, 0, 0), (0, 1, 0, 0, 0, 0)]
    father_idx = [-1, -1] # index of each vector's father
    # add possible vectors.
    for idx, v in enumerate(D_T):
        if v[0] + v[1] < n_graph:
            D_T.append((v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5]))
            D_T.append((v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5]))
            D_T.append((v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5]))
            D_T.append((v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2))
            father_idx += [idx, idx, idx, idx]
 #    D_T = itertools.chain([(1, 0, 0, 0, 0, 0)], [(0, 1, 0, 0, 0, 0)])
 #    father_idx = itertools.chain([-1], [-1]) # index of each vector's father
 #    # add possible vectors.
 #    for idx, v in enumerate(D_T):
 #        if v[0] + v[1] < n_graph:
 #            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])])
 #            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])])
 #            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])])
 #            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)])
 #            father_idx = itertools.chain(father_idx, [idx, idx, idx, idx])
    return D_T, father_idx
 def getObjectGraph(v_obj, D_T, father_idx, alphabet=[0, 1]):
    g_obj = nx.Graph()
    # do vector traceback.
    v_tb = [list(v_obj)] # traceback vectors.
    v_tb_idx = [D_T.index(v_obj)] # indices of traceback vectors.
    while v_tb_idx[-1] > 1:
        idx_pre = father_idx[v_tb_idx[-1]]
        v_tb_idx.append(idx_pre)
        v_tb.append(list(D_T[idx_pre]))
    v_tb = v_tb[::-1] # reverse
 #    v_tb_idx = v_tb_idx[::-1]
    # construct tree.
    v_c = v_tb[0] # current vector.
    if v_c[0] == 1:
        g_obj.add_node(0, node_label=alphabet[0])
    else:
        g_obj.add_node(0, node_label=alphabet[1])
    for vct in v_tb[1:]:
        if vct[0] - v_c[0] == 1:
            if vct[2] - v_c[2] == 2: # transfer 1
                label1 = alphabet[0]
                label2 = alphabet[0]
            else: # transfer 2
                label1 = alphabet[1]
                label2 = alphabet[0]
        else: 
            if vct[3] - v_c[3] == 1: # transfer 3
                label1 = alphabet[0]
                label2 = alphabet[1]
            else: # transfer 4
                label1 = alphabet[1]
                label2 = alphabet[1]
        for nd, attr in g_obj.nodes(data=True):
            if attr['node_label'] == label1:
                nb_node = nx.number_of_nodes(g_obj)
                g_obj.add_node(nb_node, node_label=label2)
                g_obj.add_edge(nd, nb_node)
                break
        v_c = vct
    return g_obj
 import random
 def hierarchy_pos(G, root=None, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5):
    '''
    From Joel's answer at https://stackoverflow.com/a/29597209/2966723.  
    Licensed under Creative Commons Attribution-Share Alike 
    If the graph is a tree this will return the positions to plot this in a 
    hierarchical layout.
    G: the graph (must be a tree)
    root: the root node of current branch 
    - if the tree is directed and this is not given, 
      the root will be found and used
    - if the tree is directed and this is given, then 
      the positions will be just for the descendants of this node.
    - if the tree is undirected and not given, 
      then a random choice will be used.
    width: horizontal space allocated for this branch - avoids overlap with other branches
    vert_gap: gap between levels of hierarchy
    vert_loc: vertical location of root
    xcenter: horizontal location of root
    '''
    if not nx.is_tree(G):
        raise TypeError('cannot use hierarchy_pos on a graph that is not a tree')
    if root is None:
        if isinstance(G, nx.DiGraph):
            root = next(iter(nx.topological_sort(G)))  #allows back compatibility with nx version 1.11
        else:
            root = random.choice(list(G.nodes))
    def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None):
        '''
        see hierarchy_pos docstring for most arguments
        pos: a dict saying where all nodes go if they have been assigned
        parent: parent of this branch. - only affects it if non-directed
        '''
        if pos is None:
            pos = {root:(xcenter,vert_loc)}
        else:
            pos[root] = (xcenter, vert_loc)
        children = list(G.neighbors(root))
        if not isinstance(G, nx.DiGraph) and parent is not None:
            children.remove(parent)  
        if len(children)!=0:
            dx = width/len(children) 
            nextx = xcenter - width/2 - dx/2
            for child in children:
                nextx += dx
                pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, 
                                    vert_loc = vert_loc-vert_gap, xcenter=nextx,
                                    pos=pos, parent = root)
        return pos
    return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
 if __name__ == '__main__':
    v_obj = (6, 4, 10, 3, 3, 2)
 #    v_obj = (6, 5, 10, 3, 3, 2)
    tree_obj, v_obj = GIPF_tree(v_obj)
    print('One closest vector is', v_obj)
    # plot
    pos = hierarchy_pos(tree_obj, 0) 
    node_labels = nx.get_node_attributes(tree_obj, 'node_label')
    nx.draw(tree_obj, pos=pos, labels=node_labels, with_labels=True)
--- a/gklearn/preimage/preimage_iam.py
+++ b/gklearn/preimage/preimage_iam.py
@@ -1,705 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Apr 30 17:07:43 2019
 A graph pre-image method combining iterative pre-image method in reference [1] 
 and the iterative alternate minimizations (IAM) in reference [2].
@author: ljia
@references:
    [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph 
    pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
    [2] Generalized median graph via iterative alternate minimization.
 """
 import sys
 import numpy as np
 from tqdm import tqdm
 import networkx as nx
 import matplotlib.pyplot as plt
 import random
 from iam import iam_upgraded
 from utils import dis_gstar, compute_kernel
 def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
                 gkernel, epsilon=0.001, InitIAMWithAllDk=False,
                 params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                             'ite_max': 50, 'epsilon': 0.001, 
                             'removeNodes': True, 'connected': False},
                 params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
                             'edit_cost_constant': [], 'stabilizer': 'min', 
                             'repeat': 50}):
    """This function constructs graph pre-image by the iterative pre-image 
    framework in reference [1], algorithm 1, where the step of generating new 
    graphs randomly is replaced by the IAM algorithm in reference [2].
    notes
    -----
    Every time a set of n better graphs is acquired, their distances in kernel space are
    compared with the k nearest ones, and the k nearest distances from the k+n
    distances will be used as the new ones.
    """
    # compute k nearest neighbors of phi in DN.
    dis_all = [] # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_all.append(dtemp)
    # sort
    sort_idx = np.argsort(dis_all)
    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
    if dis_k[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, ghat_list, 0, 0
    dhat = dis_k[0] # the nearest distance
 #    for g in ghat_list:
 #        draw_Letter_graph(g)
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
 #    for gi in Gk:
 #        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 ##        nx.draw_networkx(gi)
 #        plt.show()
 ##        draw_Letter_graph(g)
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
 #    i = 1
    r = 0
    itr_total = 0
    dis_of_each_itr = [dhat]
    found = False
    nb_updated = 0
    nb_updated_k = 0
    while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
        print('Current preimage iteration =', r)
        print('Total preimage iteration =', itr_total, '\n')
        found = False
        Gn_nearest_median = [g.copy() for g in Gk]
        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
            ghat_new_list = []
            for g_tmp in Gk:
                Gn_nearest_init = [g_tmp.copy()]
                ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median, 
                        Gn_nearest_init, params_ged=params_ged, **params_iam)
                ghat_new_list += ghat_new_list_tmp
        else: # only the best graph in D_k is used to initialize IAM.
            Gn_nearest_init = [g.copy() for g in Gk]
            ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
                    params_ged=params_ged, **params_iam)
 #        for g in g_tmp_list:
 #            nx.draw_networkx(g)
 #            plt.show()
 #            draw_Letter_graph(g)
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
        dhat_new_list = []
        for idx, g_tmp in enumerate(ghat_new_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
                                len(ghat_new_list) + len(Gn_median) + 1), 
                                alpha, knew, withterm3=False))
        for idx_g, ghat_new in enumerate(ghat_new_list):          
            dhat_new = dhat_new_list[idx_g]
            # if the new distance is smaller than the max of D_k.           
            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
                # check if the new distance is the same as one in D_k.
                is_duplicate = False
                for dis_tmp in dis_k[1:-1]:
                    if np.abs(dhat_new - dis_tmp) < epsilon:
                        is_duplicate = True
                        print('IAM: duplicate k nearest graph generated.')
                        break
                if not is_duplicate:
                    if np.abs(dhat_new - dhat) < epsilon:
                        print('IAM: I am equal!')
 #                        dhat = dhat_new
 #                        ghat_list = [ghat_new.copy()]
                    else:
                        print('IAM: we got better k nearest neighbors!')
                        nb_updated_k += 1
                        print('the k nearest neighbors are updated', 
                              nb_updated_k, 'times.')
                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
                        sort_idx = np.argsort(dis_k)
                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
                        if dhat_new < dhat:
                            print('IAM: I have smaller distance!')
                            print(str(dhat) + '->' + str(dhat_new))
                            dhat = dhat_new
                            ghat_list = [Gk[0].copy()]
                            r = 0
                            nb_updated += 1
                            print('the graph is updated', nb_updated, 'times.')                       
                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
                                with_labels=True)
                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                            plt.show()
                        found = True
        if not found:
            r += 1            
        dis_of_each_itr.append(dhat)
        itr_total += 1
        print('\nthe k shortest distances are', dis_k)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
    print('\n\nthe graph is updated', nb_updated, 'times.')
    print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
    print('distances in kernel space:', dis_of_each_itr, '\n')
    return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
 def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
                            l_max, gkernel, epsilon=0.001, 
                            InitIAMWithAllDk=False, InitRandomWithAllDk=True,
                            params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                                        'ite_max': 50, 'epsilon': 0.001, 
                                        'removeNodes': True, 'connected': False},
                            params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 
                                        'method': 'IPFP', 'edit_cost_constant': [], 
                                        'stabilizer': 'min', 'repeat': 50}):
    """This function constructs graph pre-image by the iterative pre-image 
    framework in reference [1], algorithm 1, where new graphs are generated 
    randomly and by the IAM algorithm in reference [2].
    notes
    -----
    Every time a set of n better graphs is acquired, their distances in kernel space are
    compared with the k nearest ones, and the k nearest distances from the k+n
    distances will be used as the new ones.
    """
    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
    # compute k nearest neighbors of phi in DN.
    dis_all = [] # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_all.append(dtemp)
    # sort
    sort_idx = np.argsort(dis_all)
    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
    if dis_k[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, ghat_list, 0, 0
    dhat = dis_k[0] # the nearest distance
 #    for g in ghat_list:
 #        draw_Letter_graph(g)
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
 #    for gi in Gk:
 #        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 ##        nx.draw_networkx(gi)
 #        plt.show()
 ##        draw_Letter_graph(g)
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
    r = 0
    itr_total = 0
    dis_of_each_itr = [dhat]
    nb_updated_iam = 0
    nb_updated_k_iam = 0
    nb_updated_random = 0
    nb_updated_k_random = 0
 #    is_iam_duplicate = False
    while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
        print('Current preimage iteration =', r)
        print('Total preimage iteration =', itr_total, '\n')
        found_iam = False
        Gn_nearest_median = [g.copy() for g in Gk]
        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
            ghat_new_list = []
            for g_tmp in Gk:
                Gn_nearest_init = [g_tmp.copy()]
                ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, 
                        Gn_nearest_init, params_ged=params_ged, **params_iam)
                ghat_new_list += ghat_new_list_tmp
        else: # only the best graph in D_k is used to initialize IAM.
            Gn_nearest_init = [g.copy() for g in Gk]
            ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
                    params_ged=params_ged, **params_iam)
 #        for g in g_tmp_list:
 #            nx.draw_networkx(g)
 #            plt.show()
 #            draw_Letter_graph(g)
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
        dhat_new_list = []
        for idx, g_tmp in enumerate(ghat_new_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
                            len(ghat_new_list) + len(Gn_median) + 1), 
                            alpha, knew, withterm3=False))
        # find the new k nearest graphs. 
        for idx_g, ghat_new in enumerate(ghat_new_list):          
            dhat_new = dhat_new_list[idx_g]
            # if the new distance is smaller than the max of D_k.           
            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
                # check if the new distance is the same as one in D_k.
                is_duplicate = False
                for dis_tmp in dis_k[1:-1]:
                    if np.abs(dhat_new - dis_tmp) < epsilon:
                        is_duplicate = True
                        print('IAM: duplicate k nearest graph generated.')
                        break
                if not is_duplicate:
                    if np.abs(dhat_new - dhat) < epsilon:
                        print('IAM: I am equal!')
 #                        dhat = dhat_new
 #                        ghat_list = [ghat_new.copy()]
                    else:
                        print('IAM: we got better k nearest neighbors!')
                        nb_updated_k_iam += 1
                        print('the k nearest neighbors are updated', 
                              nb_updated_k_iam, 'times.')
                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
                        sort_idx = np.argsort(dis_k)
                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
                        if dhat_new < dhat:
                            print('IAM: I have smaller distance!')
                            print(str(dhat) + '->' + str(dhat_new))
                            dhat = dhat_new
                            ghat_list = [Gk[0].copy()]
                            r = 0
                            nb_updated_iam += 1
                            print('the graph is updated by IAM', nb_updated_iam, 
                                  'times.')                       
                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
                                with_labels=True)
                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                            plt.show()
                        found_iam = True
        # when new distance is not smaller than the max of D_k, use random generation.
        if not found_iam:
            print('Distance not better, switching to random generation now.')
            print(str(dhat) + '->' + str(dhat_new))
            if InitRandomWithAllDk: # use all k nearest graphs as the initials.
                init_list = [g_init.copy() for g_init in Gk]
            else: # use just the nearest graph as the initial.
                init_list = [Gk[0].copy()]
            # number of edges to be changed.
            if len(init_list) == 1:
                # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
    #            fdgs = dhat_new
                fdgs = nb_updated_random + 1
                if fdgs < 1:
                    fdgs = 1
                fdgs = int(np.ceil(np.log(fdgs)))
                if fdgs < 1:
                    fdgs += 1
    #            fdgs = nb_updated_random + 1 # @todo:
                fdgs_list = [fdgs]
            else:
                # @todo what if the log is negetive? how to choose alpha (scalar)?
                fdgs_list = np.array(dis_k[:])
                if np.min(fdgs_list) < 1:
                    fdgs_list /= dis_k[0]
                fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
                if np.min(fdgs_list) < 1:
                    fdgs_list = np.array(fdgs_list) + 1
            l = 0
            found_random = False
            while l < l_max and not found_random:
                for idx_g, g_tmp in enumerate(init_list):
                    # add and delete edges.
                    ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
                    nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
                    np.random.seed()
                    # which edges to change.                
                    # @todo: what if fdgs is bigger than nb_vpairs?
                    idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if 
                                               fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
 #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
 #                                               (nx.number_of_nodes(gs) - 1), fdgs)
                    for item in idx_change:
                        node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
                        node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
                        if node2 >= node1: # skip the self pair.
                            node2 += 1
                        # @todo: is the randomness correct?
                        if not ghat_new.has_edge(node1, node2):
                            ghat_new.add_edge(node1, node2)
    #                        nx.draw_networkx(gs)
    #                        plt.show()
    #                        nx.draw_networkx(ghat_new)
    #                        plt.show()
                        else:
                            ghat_new.remove_edge(node1, node2)
    #                        nx.draw_networkx(gs)
    #                        plt.show()
    #                        nx.draw_networkx(ghat_new)
    #                        plt.show()
    #                nx.draw_networkx(ghat_new)
    #                plt.show()
                    # compute distance between \psi and the new generated graph.
                    knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
                    dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), 
                                         alpha, knew, withterm3=False)
                    # @todo: the new distance is smaller or also equal?
                    if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
                        # check if the new distance is the same as one in D_k.
                        is_duplicate = False
                        for dis_tmp in dis_k[1:-1]:
                            if np.abs(dhat_new - dis_tmp) < epsilon:
                                is_duplicate = True
                                print('Random: duplicate k nearest graph generated.')
                                break
                        if not is_duplicate:
                            if np.abs(dhat_new - dhat) < epsilon:
                                print('Random: I am equal!')
        #                        dhat = dhat_new
        #                        ghat_list = [ghat_new.copy()]
                            else:
                                print('Random: we got better k nearest neighbors!')
                                print('l =', str(l))
                                nb_updated_k_random += 1
                                print('the k nearest neighbors are updated by random generation', 
                                          nb_updated_k_random, 'times.')
                                dis_k = [dhat_new] + dis_k # add the new nearest distances.
                                Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
                                sort_idx = np.argsort(dis_k)
                                dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
                                Gk = [Gk[idx] for idx in sort_idx[0:k]]
                                if dhat_new < dhat:
                                    print('\nRandom: I am smaller!')
                                    print('l =', str(l))
                                    print(dhat, '->', dhat_new)                       
                                    dhat = dhat_new
                                    ghat_list = [ghat_new.copy()]
                                    r = 0
                                    nb_updated_random += 1
                                    print('the graph is updated by random generation', 
                                          nb_updated_random, 'times.')
                                    nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'), 
                                        with_labels=True)
        ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                                    plt.show()
                                found_random = True
                                break
                l += 1
            if not found_random: # l == l_max:
                r += 1            
        dis_of_each_itr.append(dhat)
        itr_total += 1
        print('\nthe k shortest distances are', dis_k)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
    print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
          nb_updated_random, 'times.')
    print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 
          'times, and by random generation', nb_updated_k_random, 'times.')
    print('distances in kernel space:', dis_of_each_itr, '\n')
    return dhat, ghat_list, dis_of_each_itr[-1], \
            nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random
 ###############################################################################
 # Old implementations.
 #def gk_iam(Gn, alpha):
 #    """This function constructs graph pre-image by the iterative pre-image 
 #    framework in reference [1], algorithm 1, where the step of generating new 
 #    graphs randomly is replaced by the IAM algorithm in reference [2].
 #    
 #    notes
 #    -----
 #    Every time a better graph is acquired, the older one is replaced by it.
 #    """
 #    pass
 #    # compute k nearest neighbors of phi in DN.
 #    dis_list = [] # distance between g_star and each graph.
 #    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
 #        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
 #                      k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
 #                      (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
 #                      k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
 #        dis_list.append(dtemp)
 #        
 #    # sort
 #    sort_idx = np.argsort(dis_list)
 #    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
 #    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
 #    if dis_gs[0] == 0: # the exact pre-image.
 #        print('The exact pre-image is found from the input dataset.')
 #        return 0, g0hat
 #    dhat = dis_gs[0] # the nearest distance
 #    Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
 #    gihat_list = []
 #    
 ##    i = 1
 #    r = 1
 #    while r < r_max:
 #        print('r =', r)
 ##        found = False
 #        Gs_nearest = Gk + gihat_list
 #        g_tmp = iam(Gs_nearest)
 #        
 #        # compute distance between \psi and the new generated graph.
 #        knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
 #                       p_quit=lmbda, n_iteration=20, remove_totters=False,
 #                       n_jobs=multiprocessing.cpu_count(), verbose=False)
 #        dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * 
 #              knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
 #              (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
 #              k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
 #        if dnew <= dhat: # the new distance is smaller
 #            print('I am smaller!')
 #            dhat = dnew
 #            g_new = g_tmp.copy() # found better graph.
 #            gihat_list = [g_new]
 #            dis_gs.append(dhat)
 #            r = 0
 #        else:
 #            r += 1
 #            
 #    ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
 #    
 #    return dhat, ghat
 #def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
 #    """This function constructs graph pre-image by the iterative pre-image 
 #    framework in reference [1], algorithm 1, where the step of generating new 
 #    graphs randomly is replaced by the IAM algorithm in reference [2].
 #    
 #    notes
 #    -----
 #    Every time a better graph is acquired, its distance in kernel space is
 #    compared with the k nearest ones, and the k nearest distances from the k+1
 #    distances will be used as the new ones.
 #    """
 #    # compute k nearest neighbors of phi in DN.
 #    dis_list = [] # distance between g_star and each graph.
 #    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
 #        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
 ##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
 ##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
 ##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
 ##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
 #        dis_list.append(dtemp)
 #        
 #    # sort
 #    sort_idx = np.argsort(dis_list)
 #    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
 #    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
 #    if dis_gs[0] == 0: # the exact pre-image.
 #        print('The exact pre-image is found from the input dataset.')
 #        return 0, g0hat
 #    dhat = dis_gs[0] # the nearest distance
 #    ghat = g0hat.copy()
 #    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
 #    for gi in Gk:
 #        nx.draw_networkx(gi)
 #        plt.show()
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
 #    Gs_nearest = Gk.copy()
 ##    gihat_list = []
 #    
 ##    i = 1
 #    r = 1
 #    while r < r_max:
 #        print('r =', r)
 ##        found = False
 ##        Gs_nearest = Gk + gihat_list
 ##        g_tmp = iam(Gs_nearest)
 #        g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
 #        nx.draw_networkx(g_tmp)
 #        plt.show()
 #        print(g_tmp.nodes(data=True))
 #        print(g_tmp.edges(data=True))
 #        
 #        # compute distance between \psi and the new generated graph.
 #        gi_list = [Gn[i] for i in idx_gi]
 #        knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
 #        dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
 #        
 ##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
 ##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
 ##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
 ##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
 #        if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
 #            print('I am smaller!')
 #            print(str(dhat) + '->' + str(dnew))
 ##            nx.draw_networkx(ghat)
 ##            plt.show()
 ##            print('->')
 ##            nx.draw_networkx(g_tmp)
 ##            plt.show()
 #            
 #            dhat = dnew
 #            g_new = g_tmp.copy() # found better graph.
 #            ghat = g_tmp.copy()
 #            dis_gs.append(dhat) # add the new nearest distance.
 #            Gs_nearest.append(g_new) # add the corresponding graph.
 #            sort_idx = np.argsort(dis_gs)
 #            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
 #            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
 #            r = 0
 #        else:
 #            r += 1
 #    
 #    return dhat, ghat
 #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
 #    """This function constructs graph pre-image by the iterative pre-image 
 #    framework in reference [1], algorithm 1, where the step of generating new 
 #    graphs randomly is replaced by the IAM algorithm in reference [2].
 #    
 #    notes
 #    -----
 #    Every time a set of n better graphs is acquired, their distances in kernel space are
 #    compared with the k nearest ones, and the k nearest distances from the k+n
 #    distances will be used as the new ones.
 #    """
 #    Gn_median = [Gn[idx].copy() for idx in idx_gi]
 #    # compute k nearest neighbors of phi in DN.
 #    dis_list = [] # distance between g_star and each graph.
 #    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
 #        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
 ##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
 ##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
 ##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
 ##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
 #        dis_list.append(dtemp)
 #        
 #    # sort
 #    sort_idx = np.argsort(dis_list)
 #    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
 #    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
 #    g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
 #    if dis_gs[0] == 0: # the exact pre-image.
 #        print('The exact pre-image is found from the input dataset.')
 #        return 0, g0hat_list
 #    dhat = dis_gs[0] # the nearest distance
 #    ghat_list = [g.copy() for g in g0hat_list]
 #    for g in ghat_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
 #    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
 #    for gi in Gk:
 #        nx.draw_networkx(gi)
 #        plt.show()
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
 #    Gs_nearest = Gk.copy()
 ##    gihat_list = []
 #    
 ##    i = 1
 #    r = 1
 #    while r < r_max:
 #        print('r =', r)
 ##        found = False
 ##        Gs_nearest = Gk + gihat_list
 ##        g_tmp = iam(Gs_nearest)
 #        g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
 #                Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
 #        for g in g_tmp_list:
 #            nx.draw_networkx(g)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
 #        
 #        # compute distance between \psi and the new generated graphs.
 #        gi_list = [Gn[i] for i in idx_gi]
 #        knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
 #        dnew_list = []
 #        for idx, g_tmp in enumerate(g_tmp_list):
 #            dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), 
 #                            len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
 #        
 ##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
 ##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
 ##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
 ##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
 #            
 #        # find the new k nearest graphs.
 #        dis_gs = dnew_list + dis_gs # add the new nearest distances.
 #        Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
 #        sort_idx = np.argsort(dis_gs)
 #        if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
 #            print('We got better k nearest neighbors! Hurray!')
 #            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
 #            print(dis_gs[-1])
 #            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
 #            nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
 #            if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
 #                print('I have smaller or equal distance!')
 #                dhat = dis_gs[0]
 #                print(str(dhat) + '->' + str(dhat))
 #                idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
 #                ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
 #                for g in ghat_list:
 #                    nx.draw_networkx(g)
 #                    plt.show()
 #                    print(g.nodes(data=True))
 #                    print(g.edges(data=True))
 #            r = 0
 #        else:
 #            r += 1
 #    
 #    return dhat, ghat_list
--- a/gklearn/preimage/preimage_random.py
+++ b/gklearn/preimage/preimage_random.py
@@ -1,309 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Mar  6 16:03:11 2019
 pre-image
@author: ljia
 """
 import sys
 import numpy as np
 import random
 from tqdm import tqdm
 import networkx as nx
 import matplotlib.pyplot as plt
 from gklearn.preimage.utils import compute_kernel, dis_gstar
 def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
    # compute k nearest neighbors of phi in DN.
    dis_list = [] # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_list.append(dtemp)
 #    print(np.max(dis_list))
 #    print(np.min(dis_list))
 #    print(np.min([item for item in dis_list if item != 0]))
 #    print(np.mean(dis_list))
    # sort
    sort_idx = np.argsort(dis_list)
    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
    if dis_gs[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, g0hat_list[0], 0
    dhat = dis_gs[0] # the nearest distance
 #    ghat_list = [g.copy() for g in g0hat_list]
 #    for g in ghat_list:
 #        draw_Letter_graph(g)
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
 #    for gi in Gk:
 ##        nx.draw_networkx(gi)
 ##        plt.show()
 #        draw_Letter_graph(g)
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
    Gs_nearest = [g.copy() for g in Gk]
    gihat_list = []
    dihat_list = []
 #    i = 1
    r = 0
 #    sod_list = [dhat]
 #    found = False
    dis_of_each_itr = [dhat]
    nb_updated = 0
    g_best = []
    while r < r_max:
        print('\nr =', r)
        print('itr for gk =', nb_updated, '\n')
        found = False
        dis_bests = dis_gs + dihat_list
        # @todo what if the log is negetive? how to choose alpha (scalar)?
        fdgs_list = np.array(dis_bests)
        if np.min(fdgs_list) < 1:
            fdgs_list /= np.min(dis_bests)
        fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
        if np.min(fdgs_list) < 1:
            fdgs_list = np.array(fdgs_list) + 1
        for ig, gs in enumerate(Gs_nearest + gihat_list):
 #            nx.draw_networkx(gs)
 #            plt.show()
            for trail in range(0, l):
 #            for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
                # add and delete edges.
                gtemp = gs.copy()
                np.random.seed()
                # which edges to change.
                # @todo: should we use just half of the adjacency matrix for undirected graphs?
                nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
                # @todo: what if fdgs is bigger than nb_vpairs?
                idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if 
                                           fdgs_list[ig] < nb_vpairs else nb_vpairs)
 #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
 #                                               (nx.number_of_nodes(gs) - 1), fdgs)
                for item in idx_change:
                    node1 = int(item / (nx.number_of_nodes(gs) - 1))
                    node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
                    if node2 >= node1: # skip the self pair.
                        node2 += 1
                    # @todo: is the randomness correct?
                    if not gtemp.has_edge(node1, node2):
                        gtemp.add_edge(node1, node2)
 #                        nx.draw_networkx(gs)
 #                        plt.show()
 #                        nx.draw_networkx(gtemp)
 #                        plt.show()
                    else:
                        gtemp.remove_edge(node1, node2)
 #                        nx.draw_networkx(gs)
 #                        plt.show()
 #                        nx.draw_networkx(gtemp)
 #                        plt.show()
 #                nx.draw_networkx(gtemp)
 #                plt.show()
                # compute distance between \psi and the new generated graph.
 #                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
 #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
 #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
                knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
                dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, 
                                 withterm3=False)
                if dnew <= dhat: # @todo: the new distance is smaller or also equal?
                    if dnew < dhat:
                        print('\nI am smaller!')
                        print('ig =', str(ig), ', l =', str(trail))
                        print(dhat, '->', dnew)
                        nb_updated += 1
                    elif dnew == dhat:                   
                        print('I am equal!') 
 #                    nx.draw_networkx(gtemp)
 #                    plt.show()
 #                    print(gtemp.nodes(data=True))
 #                    print(gtemp.edges(data=True))
                    dhat = dnew
                    gnew = gtemp.copy()
                    found = True # found better graph.                  
        if found:
            r = 0
            gihat_list = [gnew]
            dihat_list = [dhat]
        else:
            r += 1
        dis_of_each_itr.append(dhat)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
 #    dis_best.append(dhat)
    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
    print('distances in kernel space:', dis_of_each_itr, '\n')
    return dhat, g_best, nb_updated
 #    return 0, 0, 0
 if __name__ == '__main__':
    from gklearn.utils.graphfiles import loadDataset
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
 #          'extra_params': {}}
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
 #            'extra_params': {}} # node symb
    DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #DN = DN[0:10]
    lmbda = 0.03 # termination probalility
    r_max = 3 # 10 # iteration limit.
    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    #alpha_range = np.linspace(0.1, 0.9, 9)
    k = 10 # 5 # k nearest neighbors
    # randomly select two molecules
    #np.random.seed(1)
    #idx1, idx2 = np.random.randint(0, len(DN), 2)
    #g1 = DN[idx1]
    #g2 = DN[idx2]
    idx1 = 0
    idx2 = 6
    g1 = DN[idx1]
    g2 = DN[idx2]
    # compute 
    k_list = [] # kernel between each graph and itself.
    k_g1_list = [] # kernel between each graph and g1
    k_g2_list = [] # kernel between each graph and g2
    for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout): 
    #    ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
        ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False)
        k_list.append(ktemp[0, 0])
        k_g1_list.append(ktemp[0, 1])
        k_g2_list.append(ktemp[0, 2])
    g_best = []
    dis_best = []
    # for each alpha
    for alpha in alpha_range:
        print('alpha =', alpha)
        # compute k nearest neighbors of phi in DN.
        dis_list = [] # distance between g_star and each graph.
        for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout):
            dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
                          k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
            dis_list.append(np.sqrt(dtemp))
        # sort
        sort_idx = np.argsort(dis_list)
        dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
        g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN
        if dis_gs[0] == 0: # the exact pre-image.
            print('The exact pre-image is found from the input dataset.')
            g_pimg = g0hat
            break
        dhat = dis_gs[0] # the nearest distance
        Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
        gihat_list = []
        i = 1
        r = 1
        while r < r_max:
            print('r =', r)
            found = False
            for ig, gs in enumerate(Dk + gihat_list):
    #            nx.draw_networkx(gs)
    #            plt.show()
                # @todo what if the log is negetive?
                fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig]))))
                for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout):
                    # add and delete edges.
                    gtemp = gs.copy()
                    np.random.seed()
                    # which edges to change.
                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
                    nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
                    # @todo: what if fdgs is bigger than nb_vpairs?
                    idx_change = random.sample(range(nb_vpairs), fdgs if fdgs < nb_vpairs else nb_vpairs)
    #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
    #                                               (nx.number_of_nodes(gs) - 1), fdgs)
                    for item in idx_change:
                        node1 = int(item / (nx.number_of_nodes(gs) - 1))
                        node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
                        if node2 >= node1: # skip the self pair.
                            node2 += 1
                        # @todo: is the randomness correct?
                        if not gtemp.has_edge(node1, node2):
                            # @todo: how to update the bond_type? 0 or 1?
                            gtemp.add_edges_from([(node1, node2, {'bond_type': 1})])
    #                        nx.draw_networkx(gs)
    #                        plt.show()
    #                        nx.draw_networkx(gtemp)
    #                        plt.show()
                        else:
                            gtemp.remove_edge(node1, node2)
    #                        nx.draw_networkx(gs)
    #                        plt.show()
    #                        nx.draw_networkx(gtemp)
    #                        plt.show()
    #                nx.draw_networkx(gtemp)
    #                plt.show()
                    # compute distance between phi and the new generated graph.
    #                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
                    knew = compute_kernel([gtemp, g1, g2], 'untilhpathkernel', verbose=False)
                    dnew = np.sqrt(knew[0, 0] - 2 * (alpha * knew[0, 1] + (1 - alpha) * 
                          knew[0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]))
                    if dnew < dhat: # @todo: the new distance is smaller or also equal?
                        print('I am smaller!')
                        print(dhat, '->', dnew)
                        nx.draw_networkx(gtemp)
                        plt.show()
                        print(gtemp.nodes(data=True))
                        print(gtemp.edges(data=True))
                        dhat = dnew
                        gnew = gtemp.copy()
                        found = True # found better graph.
                        r = 0
                    elif dnew == dhat:                   
                        print('I am equal!')                   
            if found:
                gihat_list = [gnew]
                dis_gs.append(dhat)
            else:
                r += 1
        dis_best.append(dhat)
        g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list)       
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
        print('the corresponding pre-image is')
        nx.draw_networkx(g_best[idx])
        plt.show()
--- a/gklearn/preimage/python_code.py
+++ b/gklearn/preimage/python_code.py
@@ -1,122 +0,0 @@
 		elif opt_name == 'random-inits':
 			try:
 				num_random_inits_ = std::stoul(opt_val)
 				desired_num_random_inits_ = num_random_inits_
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
 			if num_random_inits_ <= 0:
 				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
 		}
 		elif opt_name == 'randomness':
 			if opt_val == 'PSEUDO':
 				use_real_randomness_ = False
 			elif opt_val == 'REAL':
 				use_real_randomness_ = True
 			else:
 				raise Error('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
 		}
 		elif opt_name == 'stdout':
 			if opt_val == '0':
 				print_to_stdout_ = 0
 			elif opt_val == '1':
 				print_to_stdout_ = 1
 			elif opt_val == '2':
 				print_to_stdout_ = 2
 			else:
 				raise Error('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
 		}
 		elif opt_name == 'refine':
 			if opt_val == 'TRUE':
 				refine_ = True
 			elif opt_val == 'FALSE':
 				refine_ = False
 			else:
 				raise Error('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
 		}
 		elif opt_name == 'time-limit':
 			try:
 				time_limit_in_sec_ = std::stod(opt_val)
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
 		}
 		elif opt_name == 'max-itrs':
 			try:
 				max_itrs_ = std::stoi(opt_val)
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
 		}
 		elif opt_name == 'max-itrs-without-update':
 			try:
 				max_itrs_without_update_ = std::stoi(opt_val)
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
 		}
 		elif opt_name == 'seed':
 			try:
 				seed_ = std::stoul(opt_val)
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
 		}
 		elif opt_name == 'epsilon':
 			try:
 				epsilon_ = std::stod(opt_val)
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
 			if epsilon_ <= 0:
 				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
 		}
 		elif opt_name == 'inits-increase-order':
 			try:
 				num_inits_increase_order_ = std::stoul(opt_val)
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
 			if num_inits_increase_order_ <= 0:
 				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
 		}
 		elif opt_name == 'init-type-increase-order':
 			init_type_increase_order_ = opt_val
 			if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
 				raise Exception(std::string('Invalid argument ') + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
 		}
 		elif opt_name == 'max-itrs-increase-order':
 			try:
 				max_itrs_increase_order_ = std::stoi(opt_val)
 			except:
 				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
 		}
 		else:
 			std::string valid_options('[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] ')
 			valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
 			valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
 			raise Error(std::string('Invalid option "') + opt_name + '". Usage: options = "' + valid_options + '"')
--- a/gklearn/preimage/test.py
+++ b/gklearn/preimage/test.py
@@ -1,83 +0,0 @@
 #export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
 #Pour que "import script" trouve les librairies qu'a besoin GedLib
 #Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
 import gedlibpy.librariesImport
 from  gedlibpy import gedlibpy
 import networkx as nx
 def init() :
    print("List of Edit Cost Options : ")
    for i in gedlibpy.list_of_edit_cost_options :
        print (i)
    print("")
    print("List of Method Options : ")
    for j in gedlibpy.list_of_method_options :
        print (j)
    print("")
    print("List of Init Options : ")
    for k in gedlibpy.list_of_init_options :
        print (k)
    print("")
 def test():
    gedlibpy.load_GXL_graphs('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml')
    listID = gedlibpy.get_all_graph_ids()
    gedlibpy.set_edit_cost("CHEM_1")
    gedlibpy.init()
    gedlibpy.set_method("IPFP", "")
    gedlibpy.init_method()
    g = listID[0]
    h = listID[1]
    gedlibpy.run_method(g, h)
    print("Node Map : ", gedlibpy.get_node_map(g,h))
    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
    print("Assignment Matrix : ")
    print(gedlibpy.get_assignment_matrix(g, h))
    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g,h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
 def convertGraph(G):
    G_new = nx.Graph()
    for nd, attrs in G.nodes(data=True):
        G_new.add_node(str(nd), chem=attrs['atom'])
    for nd1, nd2, attrs in G.edges(data=True):
        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
    return G_new
 def testNxGrapĥ():
    from gklearn.utils.graphfiles import loadDataset
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gedlibpy.restart_env()
    for graph in Gn:
        g_new = convertGraph(graph)
        gedlibpy.add_nx_graph(g_new, "")
    listID = gedlibpy.get_all_graph_ids()
    gedlibpy.set_edit_cost("CHEM_1")
    gedlibpy.init()
    gedlibpy.set_method("IPFP", "")
    gedlibpy.init_method()
    print(listID)
    g = listID[0]
    h = listID[1]
    gedlibpy.run_method(g, h)
    print("Node Map : ", gedlibpy.get_node_map(g, h))
    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
 #test()
 init() 
 #testNxGrapĥ()
--- a/gklearn/preimage/test_fitDistance.py
+++ b/gklearn/preimage/test_fitDistance.py
@@ -1,648 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Oct 24 11:50:56 2019
@author: ljia
 """
 from matplotlib import pyplot as plt
 import numpy as np
 from tqdm import tqdm
 from gklearn.utils.graphfiles import loadDataset
 from gklearn.preimage.utils import remove_edges
 from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
 from gklearn.preimage.utils import normalize_distance_matrix
 def test_update_costs():
    from preimage.fitDistance import update_costs
    import cvxpy as cp
    ds = np.load('results/xp_fit_method/fit_data_debug4.gm.npz')
    nb_cost_mat = ds['nb_cost_mat']
    dis_k_vec = ds['dis_k_vec']
    n_edit_operations = ds['n_edit_operations']
    ged_vec_init = ds['ged_vec_init']
    ged_mat = ds['ged_mat']
    nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
    x = cp.Variable(nb_cost_mat_new.shape[1])
    cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 #    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
 #                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
 #    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
 #                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
 #                   np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
    constraints = [x >= [0.00 for i in range(nb_cost_mat_new.shape[1])],
                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
 #    constraints = [x >= [0.00000 for i in range(nb_cost_mat_new.shape[1])]]
    prob = cp.Problem(cp.Minimize(cost_fun), constraints)
    prob.solve()
    print(x.value)
    edit_costs_new = np.concatenate((x.value, np.array([0.0])))
    residual = np.sqrt(prob.value)
 def median_paper_clcpc_python_best():
    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
       python invoking the c++ code by bash command (with updated library).
    """
 #    ds = {'name': 'monoterpenoides', 
 #          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
 #    _, y_all = loadDataset(ds['dataset'])
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    itr_max = 6
    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
                'algo_options': algo_options, 'stabilizer': None}
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt'
    for y in y_all:
        for repeat in range(repeats):
            edit_costs_output_file = open(fn_edit_costs_output, 'a')
            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
                nb_cost_mat_list = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
                                            gkernel, itr_max, params_ged=params_ged, 
                                            parallel=True)
            total_time = np.sum(time_list)
 #            print('\nedit_costs:', edit_costs)
 #            print('\nresidual_list:', residual_list)
 #            print('\nedit_cost_list:', edit_cost_list)
 #            print('\ndistance matrix in kernel space:', dis_k_mat)
 #            print('\nged matrix:', ged_mat)
 #            print('\ntotal time:', total_time)
 #            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
            np.savez('results/median_paper/fit_distance.clcpc.python_init40.monot.elabeled.uhpkernel.y' 
                     + y + '.repeat' + str(repeat) + '.k10..gm', 
                     edit_costs=edit_costs, 
                     residual_list=residual_list, edit_cost_list=edit_cost_list,
                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
            for ec in edit_costs:
                edit_costs_output_file.write(str(ec) + ' ')
            edit_costs_output_file.write('\n')
            edit_costs_output_file.close()
 #    # normalized distance matrices.
 #    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
 #    edit_cost_list = gmfile['edit_cost_list']
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
 #    nb_cost_mat_list = gmfile['nb_cost_mat_list']
            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
            print(nb_consistent, nb_inconsistent, ratio_consistent)
 #            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
 #            plt.imshow(norm_dis_k_mat)
 #            plt.colorbar()
 #            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
 #            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.png', format='png')
 #        #    plt.show()
 #            plt.clf()
 #            
 #            norm_ged_mat = normalize_distance_matrix(ged_mat)
 #            plt.imshow(norm_ged_mat)
 #            plt.colorbar()
 #            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
 #            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.png', format='png')
 #        #    plt.show()
 #            plt.clf()
 #            
 #            norm_diff = norm_ged_mat - norm_dis_k_mat
 #            plt.imshow(norm_diff)
 #            plt.colorbar()
 #            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
 #            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.png', format='png')
 #        #    plt.show()
 #            plt.clf()
 #        #    draw_count_bar(norm_diff)
 def median_paper_clcpc_python_bash_cpp():
    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
       python invoking the c++ code by bash command (with updated library).
    """
 #    ds = {'name': 'monoterpenoides', 
 #          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
 #    _, y_all = loadDataset(ds['dataset'])
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    itr_max = 20
    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
                'algo_options': algo_options}
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt'
    for y in y_all:
        for repeat in range(repeats):
            edit_costs_output_file = open(fn_edit_costs_output, 'a')
            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
                nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
                                            gkernel, itr_max, params_ged=params_ged, 
                                            parallel=False)
            total_time = np.sum(time_list)
 #            print('\nedit_costs:', edit_costs)
 #            print('\nresidual_list:', residual_list)
 #            print('\nedit_cost_list:', edit_cost_list)
 #            print('\ndistance matrix in kernel space:', dis_k_mat)
 #            print('\nged matrix:', ged_mat)
 #            print('\ntotal time:', total_time)
 #            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
            np.savez('results/median_paper/fit_distance.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
                     + y + '.repeat' + str(repeat) + '.gm', 
                     edit_costs=edit_costs, 
                     residual_list=residual_list, edit_cost_list=edit_cost_list,
                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
                     coef_dk=coef_dk)
            for ec in edit_costs:
                edit_costs_output_file.write(str(ec) + ' ')
            edit_costs_output_file.write('\n')
            edit_costs_output_file.close()
 #    # normalized distance matrices.
 #    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
 #    edit_cost_list = gmfile['edit_cost_list']
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
 #    nb_cost_mat_list = gmfile['nb_cost_mat_list']
 #    coef_dk = gmfile['coef_dk']
            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
            print(nb_consistent, nb_inconsistent, ratio_consistent)
 #            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
 #            plt.imshow(norm_dis_k_mat)
 #            plt.colorbar()
 #            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
 #            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.png', format='png')
 #        #    plt.show()
 #            plt.clf()
 #            
 #            norm_ged_mat = normalize_distance_matrix(ged_mat)
 #            plt.imshow(norm_ged_mat)
 #            plt.colorbar()
 #            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
 #            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.png', format='png')
 #        #    plt.show()
 #            plt.clf()
 #            
 #            norm_diff = norm_ged_mat - norm_dis_k_mat
 #            plt.imshow(norm_diff)
 #            plt.colorbar()
 #            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
 #            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
 #                        + y + '.repeat' + str(repeat) + '.png', format='png')
 #        #    plt.show()
 #            plt.clf()
 #        #    draw_count_bar(norm_diff)
 def test_cs_leq_ci_plus_cr_python_bash_cpp():
    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
       python invoking the c++ code by bash command (with updated library).
    """
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:10]
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    itr_max = 10
    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
                'algo_options': algo_options}
    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
                                    gkernel, itr_max, params_ged=params_ged, 
                                    parallel=False)
    total_time = np.sum(time_list)
    print('\nedit_costs:', edit_costs)
    print('\nresidual_list:', residual_list)
    print('\nedit_cost_list:', edit_cost_list)
    print('\ndistance matrix in kernel space:', dis_k_mat)
    print('\nged matrix:', ged_mat)
    print('\ntotal time:', total_time)
    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
    np.savez('results/fit_distance.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel.gm', 
             edit_costs=edit_costs, 
             residual_list=residual_list, edit_cost_list=edit_cost_list,
             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
             coef_dk=coef_dk)
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
 #    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 ##    Gn = Gn[0:10]
 ##    remove_edges(Gn)
 #    gkernel = 'untilhpathkernel'
 #    node_label = 'atom'
 #    edge_label = 'bond_type'
 #    itr_max = 10
 #    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
 #        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
 #                                                      gkernel, itr_max)
 #    total_time = np.sum(time_list)
 #    print('\nedit_costs:', edit_costs)
 #    print('\nresidual_list:', residual_list)
 #    print('\nedit_cost_list:', edit_cost_list)
 #    print('\ndistance matrix in kernel space:', dis_k_mat)
 #    print('\nged matrix:', ged_mat)
 #    print('\ntotal time:', total_time)
 #    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
 #    np.savez('results/fit_distance.cs_leq_ci_plus_cr.mutag.elabeled.uhpkernel.gm', 
 #             edit_costs=edit_costs, 
 #             residual_list=residual_list, edit_cost_list=edit_cost_list,
 #             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
 #             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
 #    # normalized distance matrices.
 #    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.monot.elabeled.uhpkernel.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
 #    edit_cost_list = gmfile['edit_cost_list']
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
 #    nb_cost_mat_list = gmfile['nb_cost_mat_list']
 #    coef_dk = gmfile['coef_dk']
    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
    print(nb_consistent, nb_inconsistent, ratio_consistent)
 #    dis_k_sub = pairwise_substitution(dis_k_mat)
 #    ged_sub = pairwise_substitution(ged_mat)    
 #    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.gm', 
 #             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
    plt.imshow(norm_dis_k_mat)
    plt.colorbar()
    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
                + '.eps', format='eps', dpi=300)
    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
                + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_ged_mat = normalize_distance_matrix(ged_mat)
    plt.imshow(norm_ged_mat)
    plt.colorbar()
    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
                + '.eps', format='eps', dpi=300)
    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
                + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_diff = norm_ged_mat - norm_dis_k_mat
    plt.imshow(norm_diff)
    plt.colorbar()
    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
                + '.eps', format='eps', dpi=300)
    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
                + '.png', format='png')
 #    plt.show()
    plt.clf()
 #    draw_count_bar(norm_diff)
 def test_anycosts():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:10]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    itr_max = 10
    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
    total_time = np.sum(time_list)
    print('\nedit_costs:', edit_costs)
    print('\nresidual_list:', residual_list)
    print('\nedit_cost_list:', edit_cost_list)
    print('\ndistance matrix in kernel space:', dis_k_mat)
    print('\nged matrix:', ged_mat)
    print('\ntotal time:', total_time)
    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
    np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, 
             residual_list=residual_list, edit_cost_list=edit_cost_list,
             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
 #    # normalized distance matrices.
 #    gmfile = np.load('results/fit_distance.any_costs.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
 #    edit_cost_list = gmfile['edit_cost_list']
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
 ##    nb_cost_mat_list = gmfile['nb_cost_mat_list']
    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
    plt.imshow(norm_dis_k_mat)
    plt.colorbar()
    plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300)
 #    plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_ged_mat = normalize_distance_matrix(ged_mat)
    plt.imshow(norm_ged_mat)
    plt.colorbar()
    plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300)
 #    plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_diff = norm_ged_mat - norm_dis_k_mat
    plt.imshow(norm_diff)
    plt.colorbar()
    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps', format='eps', dpi=300)
 #    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png')
 #    plt.show()
    plt.clf()
 #    draw_count_bar(norm_diff)
 def test_cs_leq_ci_plus_cr():
    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er
    """
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:10]
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    itr_max = 10
    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
                                                      gkernel, itr_max,
                                                      fitkernel='gaussian')
    total_time = np.sum(time_list)
    print('\nedit_costs:', edit_costs)
    print('\nresidual_list:', residual_list)
    print('\nedit_cost_list:', edit_cost_list)
    print('\ndistance matrix in kernel space:', dis_k_mat)
    print('\nged matrix:', ged_mat)
    print('\ntotal time:', total_time)
    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
    np.savez('results/fit_distance.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel.gm', 
             edit_costs=edit_costs, 
             residual_list=residual_list, edit_cost_list=edit_cost_list,
             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
             coef_dk=coef_dk)
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
 #    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 ##    Gn = Gn[0:10]
 ##    remove_edges(Gn)
 #    gkernel = 'untilhpathkernel'
 #    node_label = 'atom'
 #    edge_label = 'bond_type'
 #    itr_max = 10
 #    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
 #        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
 #                                                      gkernel, itr_max)
 #    total_time = np.sum(time_list)
 #    print('\nedit_costs:', edit_costs)
 #    print('\nresidual_list:', residual_list)
 #    print('\nedit_cost_list:', edit_cost_list)
 #    print('\ndistance matrix in kernel space:', dis_k_mat)
 #    print('\nged matrix:', ged_mat)
 #    print('\ntotal time:', total_time)
 #    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
 #    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.mutag.elabeled.uhpkernel.gm', 
 #             edit_costs=edit_costs, 
 #             residual_list=residual_list, edit_cost_list=edit_cost_list,
 #             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
 #             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
 #    # normalized distance matrices.
 #    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
 #    edit_cost_list = gmfile['edit_cost_list']
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
 #    nb_cost_mat_list = gmfile['nb_cost_mat_list']
 #    coef_dk = gmfile['coef_dk']
    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
    print(nb_consistent, nb_inconsistent, ratio_consistent)
 #    dis_k_sub = pairwise_substitution(dis_k_mat)
 #    ged_sub = pairwise_substitution(ged_mat)    
 #    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.cost_leq_1en2.gm', 
 #             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
    plt.imshow(norm_dis_k_mat)
    plt.colorbar()
    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
                + '.eps', format='eps', dpi=300)
    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
                + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_ged_mat = normalize_distance_matrix(ged_mat)
    plt.imshow(norm_ged_mat)
    plt.colorbar()
    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
                + '.eps', format='eps', dpi=300)
    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
                + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_diff = norm_ged_mat - norm_dis_k_mat
    plt.imshow(norm_diff)
    plt.colorbar()
    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
                + '.eps', format='eps', dpi=300)
    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
                + '.png', format='png')
 #    plt.show()
    plt.clf()
 #    draw_count_bar(norm_diff)
 def test_unfitted():
    """unfitted.
    """  
    from fitDistance import compute_geds
    from utils import kernel_distance_matrix
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:10]
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
 #    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 ##    Gn = Gn[0:10]
 ##    remove_edges(Gn)
 #    gkernel = 'marginalizedkernel'
    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel)
    ged_all, ged_mat, n_edit_operations = compute_geds(Gn, [3, 3, 1, 3, 3, 1], 
            [0, 1, 2, 3, 4, 5], parallel=True)
    print('\ndistance matrix in kernel space:', dis_k_mat)
    print('\nged matrix:', ged_mat)
 #    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.gm', edit_costs=edit_costs, 
 #             residual_list=residual_list, edit_cost_list=edit_cost_list,
 #             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
 #             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list) 
    # normalized distance matrices.
 #    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en3.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
 #    edit_cost_list = gmfile['edit_cost_list']
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
 #    nb_cost_mat_list = gmfile['nb_cost_mat_list']
    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
    print(nb_consistent, nb_inconsistent, ratio_consistent)
    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
    plt.imshow(norm_dis_k_mat)
    plt.colorbar()
    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_ged_mat = normalize_distance_matrix(ged_mat)
    plt.imshow(norm_ged_mat)
    plt.colorbar()
    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.png', format='png')
 #    plt.show()
    plt.clf()
    norm_diff = norm_ged_mat - norm_dis_k_mat
    plt.imshow(norm_diff)
    plt.colorbar()
    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.png', format='png')
 #    plt.show()
    plt.clf()
    draw_count_bar(norm_diff)
 def pairwise_substitution_consistence(mat1, mat2):
    """
    """
    nb_consistent = 0
    nb_inconsistent = 0
    # the matrix is considered symmetric.
    upper_tri1 = mat1[np.triu_indices_from(mat1)]
    upper_tri2 = mat2[np.tril_indices_from(mat2)]
    for i in tqdm(range(len(upper_tri1)), desc='computing consistence', file=sys.stdout):
        for j in range(i, len(upper_tri1)):
            if np.sign(upper_tri1[i] - upper_tri1[j]) == np.sign(upper_tri2[i] - upper_tri2[j]):
                nb_consistent += 1
            else:
                nb_inconsistent += 1
    return nb_consistent, nb_inconsistent, nb_consistent / (nb_consistent + nb_inconsistent)
 def pairwise_substitution(mat):
    # the matrix is considered symmetric.
    upper_tri = mat[np.triu_indices_from(mat)]
    sub_list = []
    for i in tqdm(range(len(upper_tri)), desc='computing', file=sys.stdout):
        for j in range(i, len(upper_tri)):
            sub_list.append(upper_tri[i] - upper_tri[j])
    return sub_list
 def draw_count_bar(norm_diff):
    import pandas
    from collections import Counter, OrderedDict
    norm_diff_cnt = norm_diff.flatten()
    norm_diff_cnt = norm_diff_cnt * 10
    norm_diff_cnt = np.floor(norm_diff_cnt)
    norm_diff_cnt = Counter(norm_diff_cnt)
    norm_diff_cnt = OrderedDict(sorted(norm_diff_cnt.items()))
    df = pandas.DataFrame.from_dict(norm_diff_cnt, orient='index')
    df.plot(kind='bar')
 if __name__ == '__main__':
 #    test_anycosts()
 #    test_cs_leq_ci_plus_cr()
 #    test_unfitted()
 #    test_cs_leq_ci_plus_cr_python_bash_cpp()
 #    median_paper_clcpc_python_bash_cpp()
 #    median_paper_clcpc_python_best()
 #    x = np.array([[1,2,3],[4,5,6],[7,8,9]])
 #    xx = pairwise_substitution(x)
    test_update_costs()
--- a/gklearn/preimage/test_ged.py
+++ b/gklearn/preimage/test_ged.py
@@ -1,520 +0,0 @@
 #export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
 #Pour que "import script" trouve les librairies qu'a besoin GedLib
 #Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
 #import gedlibpy_linlin.librariesImport
 #from  gedlibpy_linlin import gedlibpy
 from libs import *
 import networkx as nx
 import numpy as np
 from tqdm import tqdm
 import sys
 def test_NON_SYMBOLIC_cost():
    """Test edit cost LETTER2.
    """
    from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
    from gklearn.preimage.test_k_closest_graphs import reform_attributes
    from gklearn.utils.graphfiles import loadDataset
    dataset = '../../datasets/Letter-high/Letter-high_A.txt'
    Gn, y_all = loadDataset(dataset)
    g1 = Gn[200]
    g2 = Gn[1780]
    reform_attributes(g1)
    reform_attributes(g2)
    c_vi = 0.675
    c_vr = 0.675
    c_vs = 0.75
    c_ei = 0.425
    c_er = 0.425
    c_es = 0
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy',
        cost='NON_SYMBOLIC', method='IPFP', edit_cost_constant=edit_cost_constant,
        algo_options='', stabilizer=None)
    n_vi, n_vr, sod_vs, n_ei, n_er, sod_es = get_nb_edit_operations_nonsymbolic(g1, g2,
        pi_forward, pi_backward)
    print('# of operations:', n_vi, n_vr, sod_vs, n_ei, n_er, sod_es)
    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
        + c_ei * n_ei + c_er * n_er + c_es * sod_es
    print('dis (cost computed by GED):', dis)
    print('cost computed by # of operations and edit cost constants:', cost_computed)
 def test_LETTER2_cost():
    """Test edit cost LETTER2.
    """
    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
    from gklearn.preimage.test_k_closest_graphs import reform_attributes
    from gklearn.utils.graphfiles import loadDataset
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
    g1 = Gn[200]
    g2 = Gn[1780]
    reform_attributes(g1)
    reform_attributes(g2)
    c_vi = 0.675
    c_vr = 0.675
    c_vs = 0.75
    c_ei = 0.425
    c_er = 0.425
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er]
    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
        cost='LETTER2', method='IPFP', edit_cost_constant=edit_cost_constant,
        algo_options='', stabilizer=None)
    n_vi, n_vr, n_vs, sod_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
        pi_forward, pi_backward)
    print('# of operations:', n_vi, n_vr, n_vs, sod_vs, n_ei, n_er)
    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er)
    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
        + c_ei * n_ei + c_er * n_er
    print('dis (cost computed by GED):', dis)
    print('cost computed by # of operations and edit cost constants:', cost_computed)
 def test_get_nb_edit_operations_letter():
    """Test whether function preimage.ged.get_nb_edit_operations_letter returns
    correct numbers of edit operations. The distance/cost computed by GED
    should be the same as the cost computed by number of operations and edit
    cost constants.
    """
    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
    from gklearn.preimage.test_k_closest_graphs import reform_attributes
    from gklearn.utils.graphfiles import loadDataset
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
    g1 = Gn[200]
    g2 = Gn[1780]
    reform_attributes(g1)
    reform_attributes(g2)
    c_vir = 0.9
    c_eir = 1.7
    alpha = 0.75
    edit_cost_constant = [c_vir, c_eir, alpha]
    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
        cost='LETTER', method='IPFP', edit_cost_constant=edit_cost_constant,
        algo_options='', stabilizer=None)
    n_vi, n_vr, n_vs, c_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
        pi_forward, pi_backward)
    print('# of operations and costs:', n_vi, n_vr, n_vs, c_vs, n_ei, n_er)
    print('c_vir, c_eir, alpha:', c_vir, c_eir, alpha)
    cost_computed = alpha * c_vir * (n_vi + n_vr) \
        + alpha * c_vs \
        + (1 - alpha) * c_eir * (n_ei + n_er)
    print('dis (cost computed by GED):', dis)
    print('cost computed by # of operations and edit cost constants:', cost_computed)
 def test_get_nb_edit_operations():
    """Test whether function preimage.ged.get_nb_edit_operations returns correct
    numbers of edit operations. The distance/cost computed by GED should be the
    same as the cost computed by number of operations and edit cost constants.
    """
    from gklearn.preimage.ged import GED, get_nb_edit_operations
    from gklearn.utils.graphfiles import loadDataset
    import os
    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
    g1 = Gn[20]
    g2 = Gn[108]
    c_vi = 3
    c_vr = 3
    c_vs = 1
    c_ei = 3
    c_er = 3
    c_es = 1
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    dis, pi_forward, pi_backward = GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy',
        cost='CONSTANT', method='IPFP', edit_cost_constant=edit_cost_constant,
        algo_options='', stabilizer=None)
    n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(g1, g2,
        pi_forward, pi_backward)
    print('# of operations and costs:', n_vi, n_vr, n_vs, n_ei, n_er, n_es)
    print('edit costs:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
    cost_computed = n_vi * c_vi + n_vr * c_vr + n_vs * c_vs \
        + n_ei * c_ei + n_er * c_er + n_es * c_es
    print('dis (cost computed by GED):', dis)
    print('cost computed by # of operations and edit cost constants:', cost_computed)
 def test_ged_python_bash_cpp():
    """Test ged computation with python invoking the c++ code by bash command (with updated library).
    """
    from gklearn.utils.graphfiles import loadDataset
    from gklearn.preimage.ged import GED
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
 #    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
    for repeat in range(0, 3):
        # Generate the result file.
        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_bash_' + str(repeat) + '_init40.3_20.txt'
 #        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
        ged_file = open(ged_filename, 'a')
 #        runtime_file = open(runtime_filename, 'a')
        ged_mat = np.empty((len(Gn), len(Gn)))
 #        runtime_mat = np.empty((len(Gn), len(Gn)))
        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
            for j in range(len(Gn)):
                print(i, j)
                g1 = Gn[i]
                g2 = Gn[j]
                upper_bound, _, _ = GED(g1, g2, lib='gedlib-bash', cost='CONSTANT',
                                method='IPFP',
                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
                                algo_options=algo_options)
 #                runtime = gedlibpy.get_runtime(g1, g2)
                ged_mat[i][j] = upper_bound
 #                runtime_mat[i][j] = runtime
                # Write to files.
                ged_file.write(str(int(upper_bound)) + ' ')
 #                runtime_file.write(str(runtime) + ' ')
            ged_file.write('\n')
 #            runtime_file.write('\n')
        ged_file.close()
 #        runtime_file.close()
    print('ged_mat')
    print(ged_mat)
 #    print('runtime_mat:')
 #    print(runtime_mat)
    return
 def test_ged_best_settings_updated():
    """Test ged computation with best settings the same as in the C++ code (with updated library).
    """
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
 #    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
    for repeat in range(0, 3):
        # Generate the result file.
        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_updated_' + str(repeat) + '_init40.txt'
        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_updated_' + str(repeat) + '_init40.txt'
        gedlibpy.restart_env()
        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
        gedlibpy.init()
        gedlibpy.set_method("IPFP", algo_options)
        gedlibpy.init_method()
        ged_mat = np.empty((len(listID), len(listID)))
        runtime_mat = np.empty((len(listID), len(listID)))
        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
            ged_file = open(ged_filename, 'a')
            runtime_file = open(runtime_filename, 'a')
            for j in range(len(listID)):
                g1 = listID[i]
                g2 = listID[j]
                gedlibpy.run_method(g1, g2)
                upper_bound = gedlibpy.get_upper_bound(g1, g2)
                runtime = gedlibpy.get_runtime(g1, g2)
                ged_mat[i][j] = upper_bound
                runtime_mat[i][j] = runtime
                # Write to files.
                ged_file.write(str(int(upper_bound)) + ' ')
                runtime_file.write(str(runtime) + ' ')
            ged_file.write('\n')
            runtime_file.write('\n')
            ged_file.close()
            runtime_file.close()
    print('ged_mat')
    print(ged_mat)
    print('runtime_mat:')
    print(runtime_mat)
    return
 def test_ged_best_settings():
    """Test ged computation with best settings the same as in the C++ code.
    """
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
    for repeat in range(0, 3):
        # Generate the result file.
        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_best_settings_' + str(repeat) + '.txt'
        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_best_settings_' + str(repeat) + '.txt'
        ged_file = open(ged_filename, 'a')
        runtime_file = open(runtime_filename, 'a')
        gedlibpy.restart_env()
        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
        gedlibpy.init()
        gedlibpy.set_method("IPFP", algo_options)
        gedlibpy.init_method()
        ged_mat = np.empty((len(listID), len(listID)))
        runtime_mat = np.empty((len(listID), len(listID)))
        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
            for j in range(len(listID)):
                g1 = listID[i]
                g2 = listID[j]
                gedlibpy.run_method(g1, g2)
                upper_bound = gedlibpy.get_upper_bound(g1, g2)
                runtime = gedlibpy.get_runtime(g1, g2)
                ged_mat[i][j] = upper_bound
                runtime_mat[i][j] = runtime
                # Write to files.
                ged_file.write(str(int(upper_bound)) + ' ')
                runtime_file.write(str(runtime) + ' ')
            ged_file.write('\n')
            runtime_file.write('\n')
        ged_file.close()
        runtime_file.close()
    print('ged_mat')
    print(ged_mat)
    print('runtime_mat:')
    print(runtime_mat)
    return
 def test_ged_default():
    """Test ged computation with default settings.
    """
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
    for repeat in range(3):
        # Generate the result file.
        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_default_' + str(repeat) + '.txt'
        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_default_' + str(repeat) + '.txt'
        ged_file = open(ged_filename, 'a')
        runtime_file = open(runtime_filename, 'a')
        gedlibpy.restart_env()
        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
        gedlibpy.init()
        gedlibpy.set_method("IPFP", "")
        gedlibpy.init_method()
        ged_mat = np.empty((len(listID), len(listID)))
        runtime_mat = np.empty((len(listID), len(listID)))
        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
            for j in range(len(listID)):
                g1 = listID[i]
                g2 = listID[j]
                gedlibpy.run_method(g1, g2)
                upper_bound = gedlibpy.get_upper_bound(g1, g2)
                runtime = gedlibpy.get_runtime(g1, g2)
                ged_mat[i][j] = upper_bound
                runtime_mat[i][j] = runtime
                # Write to files.
                ged_file.write(str(int(upper_bound)) + ' ')
                runtime_file.write(str(runtime) + ' ')
            ged_file.write('\n')
            runtime_file.write('\n')
        ged_file.close()
        runtime_file.close()
    print('ged_mat')
    print(ged_mat)
    print('runtime_mat:')
    print(runtime_mat)
    return
 def test_ged_min():
    """Test ged computation with the "min" stabilizer.
    """
    from gklearn.utils.graphfiles import loadDataset
    from gklearn.preimage.ged import GED
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
 #    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
    for repeat in range(0, 3):
        # Generate the result file.
        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_min_' + str(repeat) + '.txt'
 #        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
        ged_file = open(ged_filename, 'a')
 #        runtime_file = open(runtime_filename, 'a')
        ged_mat = np.empty((len(Gn), len(Gn)))
 #        runtime_mat = np.empty((len(Gn), len(Gn)))
        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
            for j in range(len(Gn)):
                g1 = Gn[i]
                g2 = Gn[j]
                upper_bound, _, _ = GED(g1, g2, lib='gedlibpy', cost='CONSTANT',
                                method='IPFP',
                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
                                stabilizer='min', repeat=10)
 #                runtime = gedlibpy.get_runtime(g1, g2)
                ged_mat[i][j] = upper_bound
 #                runtime_mat[i][j] = runtime
                # Write to files.
                ged_file.write(str(int(upper_bound)) + ' ')
 #                runtime_file.write(str(runtime) + ' ')
            ged_file.write('\n')
 #            runtime_file.write('\n')
        ged_file.close()
 #        runtime_file.close()
    print('ged_mat')
    print(ged_mat)
 #    print('runtime_mat:')
 #    print(runtime_mat)
    return
 def init() :
    print("List of Edit Cost Options : ")
    for i in gedlibpy.list_of_edit_cost_options :
        print (i)
    print("")
    print("List of Method Options : ")
    for j in gedlibpy.list_of_method_options :
        print (j)
    print("")
    print("List of Init Options : ")
    for k in gedlibpy.list_of_init_options :
        print (k)
    print("")
 def convertGraph(G):
    G_new = nx.Graph()
    for nd, attrs in G.nodes(data=True):
        G_new.add_node(str(nd), chem=attrs['atom'])
    for nd1, nd2, attrs in G.edges(data=True):
        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
    return G_new
 def testNxGrapĥ():
    from gklearn.utils.graphfiles import loadDataset
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gedlibpy.restart_env()
    for graph in Gn:
        g_new = convertGraph(graph)
        gedlibpy.add_nx_graph(g_new, "")
    listID = gedlibpy.get_all_graph_ids()
    gedlibpy.set_edit_cost("CHEM_1")
    gedlibpy.init()
    gedlibpy.set_method("IPFP", "")
    gedlibpy.init_method()
    print(listID)
    g = listID[0]
    h = listID[1]
    gedlibpy.run_method(g, h)
    print("Node Map : ", gedlibpy.get_node_map(g, h))
    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
 if __name__ == '__main__':
 #    test_ged_default()
 #    test_ged_min()
 #    test_ged_best_settings()
 #    test_ged_best_settings_updated()
 #    test_ged_python_bash_cpp()
 #    test_get_nb_edit_operations()
 #    test_get_nb_edit_operations_letter()
 #    test_LETTER2_cost()
    test_NON_SYMBOLIC_cost()
    #init()
    #testNxGrapĥ()
--- a/gklearn/preimage/test_iam.py
+++ b/gklearn/preimage/test_iam.py
@@ -1,964 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 #from tqdm import tqdm
 from gklearn.utils.graphfiles import loadDataset
 #from gklearn.utils.logger2file import *
 from gklearn.preimage.iam import iam_upgraded
 from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
 #from gklearn.preimage.ged import ged_median
 def test_iam_monoterpenoides_with_init40():
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    # unfitted edit costs.
    c_vi = 3
    c_vr = 3
    c_vs = 1
    c_ei = 3
    c_er = 3
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.0001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
 #    ged_cost = 'CONSTANT'
    ged_cost = 'CONSTANT'
    ged_method = 'IPFP'
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    ged_stabilizer = None
 #    ged_repeat = 50
    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
                  'edit_cost_constant': edit_cost_constant, 
                  'algo_options': algo_options,
                  'stabilizer': ged_stabilizer}
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
    # classify graphs according to classes.
    time_list = []
    dis_ks_min_list = []
    dis_ks_set_median_list = []
    sod_gs_list = []
    g_best = []
    sod_set_median_list = []
    sod_list_list = []
    for y in y_all:
        print('\n-------------------------------------------------------')
        print('class of y:', y)
        time_list.append([])
        dis_ks_min_list.append([])
        dis_ks_set_median_list.append([])
        sod_gs_list.append([])
        g_best.append([])
        sod_set_median_list.append([])
        for repeat in range(repeats):
            # load median set.
            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
            Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir)
            Gn_candidate = [g.copy() for g in Gn_median]
            time0 = time.time()
            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
            = iam_upgraded(Gn_median, 
                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
                epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label, 
                connected=connected_iam, removeNodes=removeNodes, 
                params_ged=params_ged)
            time_total = time.time() - time0
            print('\ntime: ', time_total)
            time_list[-1].append(time_total)
            g_best[-1].append(G_gen_median_list[0])
            sod_set_median_list[-1].append(sod_set_median)
            print('\nsmallest sod of the set median:', sod_set_median)
            sod_gs_list[-1].append(sod_gen_median)
            print('\nsmallest sod in graph space:', sod_gen_median)
            sod_list_list.append(sod_list)
 #            # show the best graph and save it to file.
 #            print('one of the possible corresponding pre-images is')
 #            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
 #                    with_labels=True)
 ##            plt.show()
 #    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
 ##            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
 ##                        '_repeat' + str(repeat) + '_' + str(time.time()) +
 ##                        '.png', format="PNG")
 #            plt.clf()
 #    #        print(G_gen_median_list[0].nodes(data=True))
 #    #        print(G_gen_median_list[0].edges(data=True))
        print('\nsods of the set median for this class:', sod_set_median_list[-1])
        print('\nsods in graph space for this class:', sod_gs_list[-1])
 #        print('\ndistance in kernel space of set median for this class:', 
 #              dis_ks_set_median_list[-1])
 #        print('\nsmallest distances in kernel space for this class:', 
 #              dis_ks_min_list[-1])   
        print('\ntimes for this class:', time_list[-1])
        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
 #        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
 #        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
        time_list[-1] = np.mean(time_list[-1])
    print()
    print('\nmean sods of the set median for each class:', sod_set_median_list)
    print('\nmean sods in graph space for each class:', sod_gs_list)
 #    print('\ndistances in kernel space of set median for each class:', 
 #            dis_ks_set_median_list)
 #    print('\nmean smallest distances in kernel space for each class:', 
 #            dis_ks_min_list)
    print('\nmean times for each class:', time_list)
    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
 #    print('\nmean distances in kernel space of set median of all:', 
 #            np.mean(dis_ks_set_median_list))
 #    print('\nmean smallest distances in kernel space of all:', 
 #            np.mean(dis_ks_min_list))
    print('\nmean times of all:', np.mean(time_list))
 def test_iam_monoterpenoides():
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    # parameters for GED function from the IAM paper.
    # fitted edit costs (Gaussian).
    c_vi = 0.03620133402089074
    c_vr = 0.0417574590207099
    c_vs = 0.009992282328587499
    c_ei = 0.08293120042342755
    c_er = 0.09512220476358019
    c_es = 0.09222529696841467
 #    # fitted edit costs (linear combinations).
 #    c_vi = 0.1749684054238749
 #    c_vr = 0.0734054228711457
 #    c_vs = 0.05017781726016715
 #    c_ei = 0.1869431164806936
 #    c_er = 0.32055856948274
 #    c_es = 0.2569469379247611
 #    # unfitted edit costs.
 #    c_vi = 3
 #    c_vr = 3
 #    c_vs = 1
 #    c_ei = 3
 #    c_er = 3
 #    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
 #    ged_cost = 'CONSTANT'
    ged_cost = 'CONSTANT'
    ged_method = 'IPFP'
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
 #    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
                  'edit_cost_constant': edit_cost_constant, 
                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # classify graphs according to letters.
    time_list = []
    dis_ks_min_list = []
    dis_ks_set_median_list = []
    sod_gs_list = []
    g_best = []
    sod_set_median_list = []
    sod_list_list = []
    idx_dict = get_same_item_indices(y_all)
    for y_class in idx_dict:
        print('\n-------------------------------------------------------')
        print('class of y:', y_class)
        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
        time_list.append([])
        dis_ks_min_list.append([])
        dis_ks_set_median_list.append([])
        sod_gs_list.append([])
        g_best.append([])
        sod_set_median_list.append([])
        for repeat in range(50):
            idx_rdm = random.sample(range(len(Gn_class)), 10)
            print('graphs chosen:', idx_rdm)
            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
            Gn_candidate = [g.copy() for g in Gn_median]
            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
            time0 = time.time()
            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
            = iam_upgraded(Gn_median, 
                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
                params_ged=params_ged)
            time_total = time.time() - time0
            print('\ntime: ', time_total)
            time_list[-1].append(time_total)
            g_best[-1].append(G_gen_median_list[0])
            sod_set_median_list[-1].append(sod_set_median)
            print('\nsmallest sod of the set median:', sod_set_median)
            sod_gs_list[-1].append(sod_gen_median)
            print('\nsmallest sod in graph space:', sod_gen_median)
            sod_list_list.append(sod_list)
            # show the best graph and save it to file.
            print('one of the possible corresponding pre-images is')
            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
                    with_labels=True)
 #            plt.show()
    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
 #            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
 #                        '_repeat' + str(repeat) + '_' + str(time.time()) +
 #                        '.png', format="PNG")
            plt.clf()
    #        print(G_gen_median_list[0].nodes(data=True))
    #        print(G_gen_median_list[0].edges(data=True))
            # compute distance between \psi and the set median graph.
            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
                gkernel, node_label, edge_label, False)
            dhat_new_set_median_list = []
            for idx, g_tmp in enumerate(G_set_median_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
                    len(G_set_median_list) + len(Gn_median) + 1), 
                    alpha_range, knew_set_median, withterm3=False))
            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
            # compute distance between \psi and the new generated graphs.
            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
                              edge_label, False)
            dhat_new_list = []
            for idx, g_tmp in enumerate(G_gen_median_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
                                    len(G_gen_median_list) + len(Gn_median) + 1), 
                                    alpha_range, knew, withterm3=False))
            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
            dis_ks_min_list[-1].append(dhat_new_list[0])
        print('\nsods of the set median for this class:', sod_set_median_list[-1])
        print('\nsods in graph space for this class:', sod_gs_list[-1])
        print('\ndistance in kernel space of set median for this class:', 
              dis_ks_set_median_list[-1])
        print('\nsmallest distances in kernel space for this class:', 
              dis_ks_min_list[-1])   
        print('\ntimes for this class:', time_list[-1])
        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
        time_list[-1] = np.mean(time_list[-1])
    print()
    print('\nmean sods of the set median for each class:', sod_set_median_list)
    print('\nmean sods in graph space for each class:', sod_gs_list)
    print('\ndistances in kernel space of set median for each class:', 
            dis_ks_set_median_list)
    print('\nmean smallest distances in kernel space for each class:', 
            dis_ks_min_list)
    print('\nmean times for each class:', time_list)
    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
    print('\nmean distances in kernel space of set median of all:', 
            np.mean(dis_ks_set_median_list))
    print('\nmean smallest distances in kernel space of all:', 
            np.mean(dis_ks_min_list))
    print('\nmean times of all:', np.mean(time_list))
    nb_better_sods = 0
    nb_worse_sods = 0
    nb_same_sods = 0
    for sods in sod_list_list:
        if sods[0] > sods[-1]:
            nb_better_sods += 1
        elif sods[0] < sods[-1]:
            nb_worse_sods += 1
        else:
            nb_same_sods += 1
    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
          'sods are improved.')
 def test_iam_mutag():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    # parameters for GED function from the IAM paper.
    # fitted edit costs.
    c_vi = 0.03523843108436513
    c_vr = 0.03347339739350128
    c_vs = 0.06871290673612238
    c_ei = 0.08591999846720685
    c_er = 0.07962086440894103
    c_es = 0.08596855855478233
    # unfitted edit costs.
 #    c_vi = 3
 #    c_vr = 3
 #    c_vs = 1
 #    c_ei = 3
 #    c_er = 3
 #    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
 #    ged_cost = 'CONSTANT'
    ged_cost = 'CONSTANT'
    ged_method = 'IPFP'
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
 #    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
                  'edit_cost_constant': edit_cost_constant, 
                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # classify graphs according to letters.
    time_list = []
    dis_ks_min_list = []
    dis_ks_set_median_list = []
    sod_gs_list = []
    g_best = []
    sod_set_median_list = []
    sod_list_list = []
    idx_dict = get_same_item_indices(y_all)
    for y_class in idx_dict:
        print('\n-------------------------------------------------------')
        print('class of y:', y_class)
        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
        time_list.append([])
        dis_ks_min_list.append([])
        dis_ks_set_median_list.append([])
        sod_gs_list.append([])
        g_best.append([])
        sod_set_median_list.append([])
        for repeat in range(50):
            idx_rdm = random.sample(range(len(Gn_class)), 10)
            print('graphs chosen:', idx_rdm)
            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
            Gn_candidate = [g.copy() for g in Gn_median]
            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
            time0 = time.time()
            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
            = iam_upgraded(Gn_median, 
                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
                params_ged=params_ged)
            time_total = time.time() - time0
            print('\ntime: ', time_total)
            time_list[-1].append(time_total)
            g_best[-1].append(G_gen_median_list[0])
            sod_set_median_list[-1].append(sod_set_median)
            print('\nsmallest sod of the set median:', sod_set_median)
            sod_gs_list[-1].append(sod_gen_median)
            print('\nsmallest sod in graph space:', sod_gen_median)
            sod_list_list.append(sod_list)
            # show the best graph and save it to file.
            print('one of the possible corresponding pre-images is')
            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
                    with_labels=True)
 #            plt.show()
    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
 #            plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) + 
 #                        '_repeat' + str(repeat) + '_' + str(time.time()) +
 #                        '.png', format="PNG")
            plt.clf()
    #        print(G_gen_median_list[0].nodes(data=True))
    #        print(G_gen_median_list[0].edges(data=True))
            # compute distance between \psi and the set median graph.
            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
                gkernel, node_label, edge_label, False)
            dhat_new_set_median_list = []
            for idx, g_tmp in enumerate(G_set_median_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
                    len(G_set_median_list) + len(Gn_median) + 1), 
                    alpha_range, knew_set_median, withterm3=False))
            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
            # compute distance between \psi and the new generated graphs.
            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
                              edge_label, False)
            dhat_new_list = []
            for idx, g_tmp in enumerate(G_gen_median_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
                                    len(G_gen_median_list) + len(Gn_median) + 1), 
                                    alpha_range, knew, withterm3=False))
            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
            dis_ks_min_list[-1].append(dhat_new_list[0])
        print('\nsods of the set median for this class:', sod_set_median_list[-1])
        print('\nsods in graph space for this class:', sod_gs_list[-1])
        print('\ndistance in kernel space of set median for this class:', 
              dis_ks_set_median_list[-1])
        print('\nsmallest distances in kernel space for this class:', 
              dis_ks_min_list[-1])   
        print('\ntimes for this class:', time_list[-1])
        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
        time_list[-1] = np.mean(time_list[-1])
    print()
    print('\nmean sods of the set median for each class:', sod_set_median_list)
    print('\nmean sods in graph space for each class:', sod_gs_list)
    print('\ndistances in kernel space of set median for each class:', 
            dis_ks_set_median_list)
    print('\nmean smallest distances in kernel space for each class:', 
            dis_ks_min_list)
    print('\nmean times for each class:', time_list)
    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
    print('\nmean distances in kernel space of set median of all:', 
            np.mean(dis_ks_set_median_list))
    print('\nmean smallest distances in kernel space of all:', 
            np.mean(dis_ks_min_list))
    print('\nmean times of all:', np.mean(time_list))
    nb_better_sods = 0
    nb_worse_sods = 0
    nb_same_sods = 0
    for sods in sod_list_list:
        if sods[0] > sods[-1]:
            nb_better_sods += 1
        elif sods[0] < sods[-1]:
            nb_worse_sods += 1
        else:
            nb_same_sods += 1
    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
          'sods are improved.')
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_iam_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
 #    # parameters for GED function
 #    c_vi = 0.037
 #    c_vr = 0.038
 #    c_vs = 0.075
 #    c_ei = 0.001
 #    c_er = 0.001
 #    c_es = 0.0
 #    ite_max_iam = 50
 #    epsilon_iam = 0.001
 #    removeNodes = False
 #    connected_iam = False
 #    # parameters for IAM function
 #    ged_cost = 'CONSTANT'
 #    ged_method = 'IPFP'
 #    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
 #    ged_stabilizer = 'min'
 #    ged_repeat = 50
 #    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
 #                  'edit_cost_constant': edit_cost_constant, 
 #                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # parameters for GED function
    c_vi = 4
    c_vr = 4
    c_vs = 2
    c_ei = 1
    c_er = 1
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
    ged_cost = 'CHEM_1'
    ged_method = 'IPFP'
    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
                  'edit_cost_constant': edit_cost_constant, 
                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
    # number of graphs; we what to compute the median of these graphs. 
 #    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    nb_median_range = [len(Gn)]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
 #    sod_gs_min_list = []
 #    nb_updated_list = []
 #    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
        Gn_candidate = [g.copy() for g in Gn]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
 #        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
 #        km_tmp = gmfile['gm']
 #        time_km = gmfile['gmtime']
 #        # modify mixed gram matrix.
 #        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
 #        for i in range(len(Gn)):
 #            for j in range(i, len(Gn)):
 #                km[i, j] = km_tmp[i, j]
 #                km[j, i] = km[i, j]
 #        for i in range(len(Gn)):
 #            for j, idx in enumerate(idx_rdm):
 #                km[i, len(Gn) + j] = km[i, idx]
 #                km[len(Gn) + j, i] = km[i, idx]
 #        for i, idx1 in enumerate(idx_rdm):
 #            for j, idx2 in enumerate(idx_rdm):
 #                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate, 
            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
            params_ged=params_ged)
        time_total = time.time() - time0
        print('\ntime: ', time_total)
        time_list.append(time_total)
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
        dhat_new_list = []
        for idx, g_tmp in enumerate(ghat_new_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
                                len(ghat_new_list) + len(Gn_median) + 1), 
                                alpha_range, knew, withterm3=False))
        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
        dis_ks_min_list.append(dhat_new_list[0])
        g_best.append(ghat_new_list[0])
        # show the best graph and save it to file.
 #        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'), 
                with_labels=True)
        plt.show()
 #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
                    '.png', format="PNG")
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        sod_gs_list.append(sod_min)
 #        sod_gs_min_list.append(np.min(sod_min))
        print('\nsmallest sod in graph space: ', sod_min)
    print('\nsods in graph space: ', sod_gs_list)
 #    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
 #    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
 #          nb_updated_list)
 #    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
 #          nb_updated_k_list)
    print('\ntimes:', time_list)
 def test_iam_letter_h():
    from median import draw_Letter_graph
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
 #          'extra_params': {}} # node nsymb
 #    Gn = Gn[0:50]
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'
    # parameters for GED function from the IAM paper.
    c_vi = 3
    c_vr = 3
    c_vs = 1
    c_ei = 3
    c_er = 3
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
 #    ged_cost = 'CONSTANT'
    ged_cost = 'LETTER'
    ged_method = 'IPFP'
 #    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
                  'edit_cost_constant': edit_cost_constant, 
                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # classify graphs according to letters.
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    g_best = []
    sod_set_median_list = []
    idx_dict = get_same_item_indices(y_all)
    for letter in idx_dict:
        print('\n-------------------------------------------------------')
        print('letter', letter)
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        g_best.append([])
        sod_set_median_list.append([])
        for repeat in range(50):
            idx_rdm = random.sample(range(len(Gn_let)), 50)
            print('graphs chosen:', idx_rdm)
            Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
            Gn_candidate = [g.copy() for g in Gn_median]
            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
            time0 = time.time()
            ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median, 
                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
                params_ged=params_ged)
            time_total = time.time() - time0
            print('\ntime: ', time_total)
            time_list[-1].append(time_total)
            g_best[-1].append(ghat_new_list[0])
            sod_set_median_list[-1].append(sod_set_median)
            print('\nsmallest sod of the set median:', sod_set_median)
            sod_gs_list[-1].append(sod_min)
            print('\nsmallest sod in graph space:', sod_min)
            # show the best graph and save it to file.
            print('one of the possible corresponding pre-images is')
            draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
            # compute distance between \psi and the new generated graphs.
            knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
            dhat_new_list = []
            for idx, g_tmp in enumerate(ghat_new_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
                                    len(ghat_new_list) + len(Gn_median) + 1), 
                                    alpha_range, knew, withterm3=False))
            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
            dis_ks_min_list[-1].append(dhat_new_list[0])            
        print('\nsods of the set median for this letter:', sod_set_median_list[-1])
        print('\nsods in graph space for this letter:', sod_gs_list[-1])
        print('\nsmallest distances in kernel space for this letter:', 
              dis_ks_min_list[-1])
        print('\ntimes for this letter:', time_list[-1])
        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
        time_list[-1] = np.mean(time_list[-1])
    print('\nmean sods of the set median for each letter:', sod_set_median_list)
    print('\nmean sods in graph space for each letter:', sod_gs_list)
    print('\nmean smallest distances in kernel space for each letter:', 
            dis_ks_min_list)
    print('\nmean times for each letter:', time_list)
    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
    print('\nmean smallest distances in kernel space of all:', 
            np.mean(dis_ks_min_list))
    print('\nmean times of all:', np.mean(time_list))
 def test_iam_fitdistance():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
 #    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
 #    lmbda = 0.03 # termination probalility
 #    # parameters for GED function
 #    c_vi = 0.037
 #    c_vr = 0.038
 #    c_vs = 0.075
 #    c_ei = 0.001
 #    c_er = 0.001
 #    c_es = 0.0
 #    ite_max_iam = 50
 #    epsilon_iam = 0.001
 #    removeNodes = False
 #    connected_iam = False
 #    # parameters for IAM function
 #    ged_cost = 'CONSTANT'
 #    ged_method = 'IPFP'
 #    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
 #    ged_stabilizer = 'min'
 #    ged_repeat = 50
 #    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
 #                  'edit_cost_constant': edit_cost_constant, 
 #                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # parameters for GED function
    c_vi = 4
    c_vr = 4
    c_vs = 2
    c_ei = 1
    c_er = 1
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
    ged_cost = 'CHEM_1'
    ged_method = 'IPFP'
    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
                  'edit_cost_constant': edit_cost_constant, 
                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
    # number of graphs; we what to compute the median of these graphs. 
 #    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    nb_median_range = [10]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    dis_ks_gen_median_list = []
    sod_gs_list = []
 #    sod_gs_min_list = []
 #    nb_updated_list = []
 #    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
        Gn_candidate = [g.copy() for g in Gn_median]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
 #        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
 #        km_tmp = gmfile['gm']
 #        time_km = gmfile['gmtime']
 #        # modify mixed gram matrix.
 #        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
 #        for i in range(len(Gn)):
 #            for j in range(i, len(Gn)):
 #                km[i, j] = km_tmp[i, j]
 #                km[j, i] = km[i, j]
 #        for i in range(len(Gn)):
 #            for j, idx in enumerate(idx_rdm):
 #                km[i, len(Gn) + j] = km[i, idx]
 #                km[len(Gn) + j, i] = km[i, idx]
 #        for i, idx1 in enumerate(idx_rdm):
 #            for j, idx2 in enumerate(idx_rdm):
 #                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
            = iam_upgraded(Gn_median, Gn_candidate, 
            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
            params_ged=params_ged)
        time_total = time.time() - time0
        print('\ntime: ', time_total)
        time_list.append(time_total)
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
                              edge_label, False)
        dhat_new_list = []
        for idx, g_tmp in enumerate(G_gen_median_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
                                len(G_gen_median_list) + len(Gn_median) + 1), 
                                alpha_range, knew, withterm3=False))
        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
        dis_ks_min_list.append(dhat_new_list[0])
        g_best.append(G_gen_median_list[0])
        # show the best graph and save it to file.
 #        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
                with_labels=True)
        plt.show()
 #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
 #        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
 #                    '.png', format="PNG")
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        sod_gs_list.append(sod_gen_median)
 #        sod_gs_min_list.append(np.min(sod_gen_median))
        print('\nsmallest sod in graph space: ', sod_gen_median)
        print('\nsmallest sod of set median in graph space: ', sod_set_median)
    print('\nsods in graph space: ', sod_gs_list)
 #    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
 #    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
 #          nb_updated_list)
 #    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
 #          nb_updated_k_list)
    print('\ntimes:', time_list)
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # tests on different numbers of median-sets.
 #    test_iam_median_nb()
 #    test_iam_letter_h()
 #    test_iam_monoterpenoides()
 #    test_iam_mutag()
 #    test_iam_fitdistance()
 #    print("test log")
    test_iam_monoterpenoides_with_init40()
--- a/gklearn/preimage/test_k_closest_graphs.py
+++ b/gklearn/preimage/test_k_closest_graphs.py
@@ -1,462 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Mon Dec 16 11:53:54 2019
@author: ljia
 """
 import numpy as np
 import math
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 from tqdm import tqdm
 from itertools import combinations, islice
 import multiprocessing
 from multiprocessing import Pool
 from functools import partial
 from gklearn.utils.graphfiles import loadDataset, loadGXL
 #from gklearn.utils.logger2file import *
 from gklearn.preimage.iam import iam_upgraded, iam_bash
 from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix
 from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
 #from gklearn.preimage.ged import ged_median
 def fit_edit_cost_constants(fit_method, edit_cost_name, 
                            edit_cost_constants=None, initial_solutions=1,
                            Gn_median=None, node_label=None, edge_label=None,
                            gkernel=None, dataset=None, init_ecc=None,
                            Gn=None, Kmatrix_median=None):
    """fit edit cost constants.    
    """
    if fit_method == 'random': # random
        if edit_cost_name == 'LETTER':
            edit_cost_constants = random.sample(range(1, 10), 3)
            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
        elif edit_cost_name == 'LETTER2':
            random.seed(time.time())
            edit_cost_constants = random.sample(range(1, 10), 5)
 #            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
        elif edit_cost_name == 'NON_SYMBOLIC':
            edit_cost_constants = random.sample(range(1, 10), 6)
            if Gn_median[0].graph['node_attrs'] == []:
                edit_cost_constants[2] = 0
            if Gn_median[0].graph['edge_attrs'] == []:
                edit_cost_constants[5] = 0
        else:
            edit_cost_constants = random.sample(range(1, 10), 6)
        print('edit cost constants used:', edit_cost_constants)
    elif fit_method == 'expert': # expert
        if init_ecc is None:
            if edit_cost_name == 'LETTER':
                edit_cost_constants = [0.9, 1.7, 0.75] 
            elif edit_cost_name == 'LETTER2':
                edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
            else:
                edit_cost_constants = [3, 3, 1, 3, 3, 1] 
        else:
            edit_cost_constants = init_ecc
    elif fit_method == 'k-graphs':
        itr_max = 6
        if init_ecc is None:
            if edit_cost_name == 'LETTER':
                init_costs = [0.9, 1.7, 0.75] 
            elif edit_cost_name == 'LETTER2':
                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
            elif edit_cost_name == 'NON_SYMBOLIC':
                init_costs = [0, 0, 1, 1, 1, 0]
                if Gn_median[0].graph['node_attrs'] == []:
                    init_costs[2] = 0
                if Gn_median[0].graph['edge_attrs'] == []:
                    init_costs[5] = 0
            else:
                init_costs = [3, 3, 1, 3, 3, 1] 
        else:
            init_costs = init_ecc
        algo_options = '--threads 1 --initial-solutions ' \
                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
                      'algo_options': algo_options, 'stabilizer': None}
        # fit on k-graph subset
        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, 
                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
                init_costs=init_costs, dataset=dataset, Kmatrix=Kmatrix_median, 
                parallel=True)
    elif fit_method == 'whole-dataset':
        itr_max = 6
        if init_ecc is None:
            if edit_cost_name == 'LETTER':
                init_costs = [0.9, 1.7, 0.75] 
            elif edit_cost_name == 'LETTER2':
                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
            else:
                init_costs = [3, 3, 1, 3, 3, 1] 
        else:
            init_costs = init_ecc
        algo_options = '--threads 1 --initial-solutions ' \
                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
                    'algo_options': algo_options, 'stabilizer': None}
        # fit on all subset
        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, 
                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
                init_costs=init_costs, dataset=dataset, parallel=True)
    elif fit_method == 'precomputed':
        pass
    return edit_cost_constants
 def compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
                                     gkernel, edit_cost_name, 
                                     Kmatrix_median=None):
    # reform graphs.
    set_median = loadGXL(fname_sm)
    gen_median = loadGXL(fname_gm)
 #    print(gen_median.nodes(data=True))
 #    print(gen_median.edges(data=True))
    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
 #        dataset == 'Fingerprint':
 #        for g in Gn_median:
 #            reform_attributes(g)
        reform_attributes(set_median, Gn_median[0].graph['node_attrs'], 
                          Gn_median[0].graph['edge_attrs'])
        reform_attributes(gen_median, Gn_median[0].graph['node_attrs'], 
                          Gn_median[0].graph['edge_attrs'])
    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
        node_label = None
        edge_label = None
    else:
        node_label = 'chem'
        edge_label = 'valence'
    # compute Gram matrix for median set.
    if Kmatrix_median is None:
        Kmatrix_median = compute_kernel(Gn_median, gkernel, node_label, edge_label, False)
    # compute distance in kernel space for set median.
    kernel_sm = []
    for G_median in Gn_median:
        km_tmp = compute_kernel([set_median, G_median], gkernel, node_label, edge_label, False)
        kernel_sm.append(km_tmp[0, 1])
    Kmatrix_sm = np.concatenate((np.array([kernel_sm]), np.copy(Kmatrix_median)), axis=0)
    Kmatrix_sm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_sm]).T, Kmatrix_sm), axis=1)
 #    Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, 
 #                                node_label, edge_label, False)
    dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), 
                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False)
 #    print(gen_median.nodes(data=True))
 #    print(gen_median.edges(data=True))
 #    print(set_median.nodes(data=True))
 #    print(set_median.edges(data=True))
    # compute distance in kernel space for generalized median.
    kernel_gm = []
    for G_median in Gn_median:
        km_tmp = compute_kernel([gen_median, G_median], gkernel, node_label, edge_label, False)
        kernel_gm.append(km_tmp[0, 1])
    Kmatrix_gm = np.concatenate((np.array([kernel_gm]), np.copy(Kmatrix_median)), axis=0)
    Kmatrix_gm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_gm]).T, Kmatrix_gm), axis=1)
 #    Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, 
 #                                node_label, edge_label, False)
    dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), 
                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False)
    # compute distance in kernel space for each graph in median set.
    dis_k_gi = []
    for idx in range(len(Gn_median)):
        dis_k_gi.append(dis_gstar(idx+1, range(1, 1+len(Gn_median)), 
                             [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False))
    print('dis_k_sm:', dis_k_sm)
    print('dis_k_gm:', dis_k_gm)
    print('dis_k_gi:', dis_k_gi)
    idx_dis_k_gi_min = np.argmin(dis_k_gi)
    dis_k_gi_min = dis_k_gi[idx_dis_k_gi_min]
    print('min dis_k_gi:', dis_k_gi_min)    
    return dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min
 def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method,
                               graph_dir=None, initial_solutions=1,
                               edit_cost_constants=None, group_min=None, 
                               dataset=None, edit_cost_name=None, init_ecc=None,
                               Kmatrix=None, parallel=True):
 #    dataset = dataset.lower()
 #    # compute distances in kernel space.
 #    dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
 #                                              Kmatrix=None, gkernel=gkernel)
 #    # ged.
 #    gmfile = np.load('results/test_k_closest_graphs/ged_mat.fit_on_whole_dataset.with_medians.gm.npz')
 #    ged_mat = gmfile['ged_mat']
 #    dis_mat = ged_mat[0:len(Gn), 0:len(Gn)]
 #    # choose k closest graphs
 #    time0 = time.time()
 #    sod_ks_min, group_min = get_closest_k_graphs(dis_mat, k, parallel)
 #    time_spent = time.time() - time0
 #    print('closest graphs:', sod_ks_min, group_min)
 #    print('time spent:', time_spent)
 #    group_min = (12, 13, 22, 29) # closest w.r.t path kernel
 #    group_min = (77, 85, 160, 171) # closest w.r.t ged
 #    group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel
    Gn_median = [Gn[g].copy() for g in group_min]
    if Kmatrix is not None:
        Kmatrix_median = np.copy(Kmatrix[group_min,:])
        Kmatrix_median = Kmatrix_median[:,group_min]
    else:
        Kmatrix_median = None
    # 1. fit edit cost constants. 
    time0 = time.time()
    edit_cost_constants = fit_edit_cost_constants(fit_method, edit_cost_name,
        edit_cost_constants=edit_cost_constants, initial_solutions=initial_solutions,
        Gn_median=Gn_median, node_label=node_label, edge_label=edge_label,
        gkernel=gkernel, dataset=dataset, init_ecc=init_ecc,
        Gn=Gn, Kmatrix_median=Kmatrix_median)
    time_fitting = time.time() - time0
    # 2. compute set median and gen median using IAM (C++ through bash).
    print('\nstart computing set median and gen median using IAM (C++ through bash)...\n')
    group_fnames = [Gn[g].graph['filename'] for g in group_min]
    time0 = time.time()
    sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constants,
            cost=edit_cost_name, initial_solutions=initial_solutions,
            graph_dir=graph_dir, dataset=dataset)
    time_generating = time.time() - time0
    print('\nmedians computed.\n')
    # 3. compute distances to the true median.
    print('\nstart computing distances to true median....\n')
    Gn_median = [Gn[g].copy() for g in group_min]
    dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min = \
        compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
                                         gkernel, edit_cost_name, 
                                         Kmatrix_median=Kmatrix_median)
    idx_dis_k_gi_min = group_min[idx_dis_k_gi_min]
    print('index min dis_k_gi:', idx_dis_k_gi_min)
    print('sod_sm:', sod_sm)
    print('sod_gm:', sod_gm)
    # collect return values.
    return (sod_sm, sod_gm), \
           (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \
           (time_fitting, time_generating)
 def reform_attributes(G, na_names=[], ea_names=[]):
    if not na_names == []: 
        for node in G.nodes:
            G.nodes[node]['attributes'] = [G.node[node][a_name] for a_name in na_names]
    if not ea_names == []:
        for edge in G.edges:
            G.edges[edge]['attributes'] = [G.edge[edge][a_name] for a_name in ea_names]
 def get_closest_k_graphs(dis_mat, k, parallel):
    k_graph_groups = combinations(range(0, len(dis_mat)), k)
    sod_ks_min = np.inf
    if parallel:
        len_combination = get_combination_length(len(dis_mat), k)
        len_itr_max = int(len_combination if len_combination < 1e7 else 1e7)
 #        pos_cur = 0
        graph_groups_slices = split_iterable(k_graph_groups, len_itr_max, len_combination)
        for graph_groups_cur in graph_groups_slices:
 #        while True:
 #            graph_groups_cur = islice(k_graph_groups, pos_cur, pos_cur + len_itr_max)
            graph_groups_cur_list = list(graph_groups_cur) 
            print('current position:', graph_groups_cur_list[0])
            len_itr_cur = len(graph_groups_cur_list)
 #            if len_itr_cur < len_itr_max:
 #                break
            itr = zip(graph_groups_cur_list, range(0, len_itr_cur))
            sod_k_list = np.empty(len_itr_cur)
            graphs_list = [None] * len_itr_cur
            n_jobs = multiprocessing.cpu_count()
            chunksize = int(len_itr_max / n_jobs + 1)
            n_jobs = multiprocessing.cpu_count()
            def init_worker(dis_mat_toshare):
                global G_dis_mat
                G_dis_mat = dis_mat_toshare
            pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(dis_mat,))
 #            iterator = tqdm(pool.imap_unordered(_get_closest_k_graphs_parallel, 
 #                                                itr, chunksize),
 #                            desc='Choosing k closest graphs', file=sys.stdout)
            iterator = pool.imap_unordered(_get_closest_k_graphs_parallel, itr, chunksize)
            for graphs, i, sod_ks in iterator:
                sod_k_list[i] = sod_ks
                graphs_list[i] = graphs
            pool.close()
            pool.join()
            arg_min = np.argmin(sod_k_list)
            sod_ks_cur = sod_k_list[arg_min]
            group_cur = graphs_list[arg_min]
            if sod_ks_cur < sod_ks_min:
                sod_ks_min = sod_ks_cur
                group_min = group_cur
                print('get closer graphs:', sod_ks_min, group_min)
    else:        
        for items in tqdm(k_graph_groups, desc='Choosing k closest graphs', file=sys.stdout):
    #        if items[0] != itmp:
    #            itmp = items[0]
    #            print(items)
            k_graph_pairs = combinations(items, 2)
            sod_ks = 0
            for i1, i2 in k_graph_pairs:
                sod_ks += dis_mat[i1, i2]
            if sod_ks < sod_ks_min:
                sod_ks_min = sod_ks
                group_min = items
                print('get closer graphs:', sod_ks_min, group_min)
    return sod_ks_min, group_min
 def _get_closest_k_graphs_parallel(itr):
    k_graph_pairs = combinations(itr[0], 2)
    sod_ks = 0
    for i1, i2 in k_graph_pairs:
        sod_ks += G_dis_mat[i1, i2]
    return itr[0], itr[1], sod_ks
 def split_iterable(iterable, n, len_iter):
    it = iter(iterable)
    for i in range(0, len_iter, n):
        piece = islice(it, n)
        yield piece
 def get_combination_length(n, k):
    len_combination = 1
    for i in range(n, n - k, -1):
        len_combination *= i
    return int(len_combination / math.factorial(k))
 ###############################################################################
 def test_k_closest_graphs():
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
 #    gkernel = 'untilhpathkernel'
 #    gkernel = 'weisfeilerlehmankernel'
    gkernel = 'treeletkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    k = 5
    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
 #    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
 #        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
 #                                     'precomputed', edit_costs=edit_costs, 
 ##                                     'k-graphs',
 #                                     parallel=False)
 #        
 #    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
 #        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
 #                                     'expert', parallel=False)
    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
                                     'expert', parallel=False)
    return
 def test_k_closest_graphs_with_cv():
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    k = 4
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    sod_sm_list = []
    sod_gm_list = []
    dis_k_sm_list = []
    dis_k_gm_list = []
    dis_k_gi_min_list = []
    for y in y_all:
        print('\n-------------------------------------------------------')
        print('class of y:', y)
        sod_sm_list.append([])
        sod_gm_list.append([])
        dis_k_sm_list.append([])
        dis_k_gm_list.append([])
        dis_k_gi_min_list.append([])
        for repeat in range(repeats):
            print('\nrepeat ', repeat)
            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, 
                                             k, 'whole-dataset', graph_dir=graph_dir,
                                             parallel=False)
            sod_sm_list[-1].append(sod_sm)
            sod_gm_list[-1].append(sod_gm)
            dis_k_sm_list[-1].append(dis_k_sm)
            dis_k_gm_list[-1].append(dis_k_gm)
            dis_k_gi_min_list[-1].append(dis_k_gi_min)
        print('\nsods of the set median for this class:', sod_sm_list[-1])
        print('\nsods of the gen median for this class:', sod_gm_list[-1])
        print('\ndistances in kernel space of set median for this class:', 
              dis_k_sm_list[-1])
        print('\ndistances in kernel space of gen median for this class:', 
              dis_k_gm_list[-1])
        print('\ndistances in kernel space of min graph for this class:', 
              dis_k_gi_min_list[-1])
        sod_sm_list[-1] = np.mean(sod_sm_list[-1])
        sod_gm_list[-1] = np.mean(sod_gm_list[-1])
        dis_k_sm_list[-1] = np.mean(dis_k_sm_list[-1])
        dis_k_gm_list[-1] = np.mean(dis_k_gm_list[-1])
        dis_k_gi_min_list[-1] = np.mean(dis_k_gi_min_list[-1])
    print()
    print('\nmean sods of the set median for each class:', sod_sm_list)
    print('\nmean sods of the gen median for each class:', sod_gm_list)
    print('\nmean distance in kernel space of set median for each class:', 
          dis_k_sm_list)
    print('\nmean distances in kernel space of gen median for each class:', 
          dis_k_gm_list)
    print('\nmean distances in kernel space of min graph for each class:', 
          dis_k_gi_min_list)
    print('\nmean sods of the set median of all:', np.mean(sod_sm_list))
    print('\nmean sods of the gen median of all:', np.mean(sod_gm_list))
    print('\nmean distances in kernel space of set median of all:', 
            np.mean(dis_k_sm_list))
    print('\nmean distances in kernel space of gen median of all:', 
            np.mean(dis_k_gm_list))
    print('\nmean distances in kernel space of min graph of all:', 
            np.mean(dis_k_gi_min_list))
    return
 if __name__ == '__main__':
    test_k_closest_graphs()
 #    test_k_closest_graphs_with_cv()
--- a/gklearn/preimage/test_median_preimage_generator.py
+++ b/gklearn/preimage/test_median_preimage_generator.py
@@ -1,69 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Fri Mar 27 17:30:55 2020
@author: ljia
 """
 import multiprocessing
 import functools
 from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
 from gklearn.preimage import MedianPreimageGenerator
 from gklearn.utils import Dataset
 def test_median_preimage_generator():
 	# 1. set parameters.
 	print('1. setting parameters...')
 	ds_name = 'Letter-high'
 	mpg = MedianPreimageGenerator()
 	mpg_options = {'fit_method': 'k-graphs',
 				   'init_ecc': [3, 3, 1, 3, 3],
 				   'ds_name': 'Letter-high',
 				   'parallel': True,
 				   'time_limit_in_sec': 0,
 				   'max_itrs': 100,
 				   'max_itrs_without_update': 3,
 				   'epsilon_ratio': 0.01,
 				   'verbose': 2}
 	mpg.set_options(**mpg_options)
 	mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
 	sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
 	mpg.kernel_options = {'name': 'structuralspkernel',
 					      'edge_weight': None,
 						  'node_kernels': sub_kernels,
 						  'edge_kernels': sub_kernels, 
 						  'compute_method': 'naive',
 						  'parallel': 'imap_unordered', 
 # 						  'parallel': None, 
 						  'n_jobs': multiprocessing.cpu_count(),
 						  'normalize': True,
 						  'verbose': 2}
 	mpg.ged_options = {'method': 'IPFP',
 					   'initial_solutions': 40,
 					   'edit_cost': 'LETTER2',
 					   'attr_distance': 'euclidean',
 					   'ratio_runs_from_initial_solutions': 1,
 					   'threads': multiprocessing.cpu_count(),
 					   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
 	mpg.mge_options = {'init_type': 'MEDOID',
 					   'random_inits': 10,
 					   'time_limit': 600,
 					   'verbose': 2,
 					   'refine': False}
 	# 2. get dataset.
 	print('2. getting dataset...')
 	mpg.dataset = Dataset()
 	mpg.dataset.load_predefined_dataset(ds_name)
 	mpg.dataset.cut_graphs(range(0, 10))
 	# 3. compute median preimage.
 	print('3. computing median preimage...')
 	mpg.run()
 if __name__ == '__main__':
 	test_median_preimage_generator()
--- a/gklearn/preimage/test_others.py
+++ b/gklearn/preimage/test_others.py
@@ -1,686 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Jul  4 12:20:16 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 from tqdm import tqdm
 from gklearn.utils.graphfiles import loadDataset
 from gklearn.preimage.median import draw_Letter_graph
 from gklearn.preimage.ged import GED, ged_median
 from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \
    dis_gstar, remove_edges
 # --------------------------- These are tests --------------------------------#
 def test_who_is_the_closest_in_kernel_space(Gn):
    idx_gi = [0, 6]
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
    # create the "median" graph.
    gnew = g2.copy()
    gnew.remove_node(0)
    nx.draw_networkx(gnew)
    plt.show()
    print(gnew.nodes(data=True))
    Gn = [gnew] + Gn
    # compute gram matrix
    Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
    # the distance matrix
    dmatrix = gram2distances(Kmatrix)
    print(np.sort(dmatrix[idx_gi[0] + 1]))
    print(np.argsort(dmatrix[idx_gi[0] + 1]))
    print(np.sort(dmatrix[idx_gi[1] + 1]))
    print(np.argsort(dmatrix[idx_gi[1] + 1]))
    # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
    dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
    print(np.sort(dis_median))
    print(np.argsort(dis_median))
    return
 def test_who_is_the_closest_in_GED_space(Gn):
    idx_gi = [0, 6]
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
    # create the "median" graph.
    gnew = g2.copy()
    gnew.remove_node(0)
    nx.draw_networkx(gnew)
    plt.show()
    print(gnew.nodes(data=True))
    Gn = [gnew] + Gn
    # compute GEDs
    ged_matrix = np.zeros((len(Gn), len(Gn)))
    for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
        for i2 in range(len(Gn)):
            dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
            ged_matrix[i1, i2] = dis
    print(np.sort(ged_matrix[idx_gi[0] + 1]))
    print(np.argsort(ged_matrix[idx_gi[0] + 1]))
    print(np.sort(ged_matrix[idx_gi[1] + 1]))
    print(np.argsort(ged_matrix[idx_gi[1] + 1]))
    # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
    dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
    print(np.sort(dis_median))
    print(np.argsort(dis_median))
    return
 def test_will_IAM_give_the_median_graph_we_wanted(Gn):
    idx_gi = [0, 6]
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
 #    del Gn[idx_gi[0]]
 #    del Gn[idx_gi[1] - 1]
    g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
 #    g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
    nx.draw_networkx(g_median)
    plt.show()
    print(g_median.nodes(data=True))
    print(g_median.edges(data=True))
 def test_new_IAM_allGraph_deleteNodes(Gn):
    idx_gi = [0, 6]
 #    g1 = Gn[idx_gi[0]].copy()
 #    g2 = Gn[idx_gi[1]].copy()
 #    g1 = nx.Graph(name='haha')
 #    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
 #    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
 #    g2 = nx.Graph(name='hahaha')
 #    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
 #                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
 #    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
 #                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
    g1 = nx.Graph(name='haha')
    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
    g2 = nx.Graph(name='hahaha')
    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
 #    g2 = g1.copy()
 #    g2.add_nodes_from([(3, {'atom': 'O'})])
 #    g2.add_nodes_from([(4, {'atom': 'C'})])
 #    g2.add_edges_from([(1, 3, {'bond_type': '1'})])
 #    g2.add_edges_from([(3, 4, {'bond_type': '1'})])
 #    del Gn[idx_gi[0]]
 #    del Gn[idx_gi[1] - 1]
    nx.draw_networkx(g1)
    plt.show()
    print(g1.nodes(data=True))
    print(g1.edges(data=True))
    nx.draw_networkx(g2)
    plt.show()
    print(g2.nodes(data=True))
    print(g2.edges(data=True))
    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
 #    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
    nx.draw_networkx(g_median)
    plt.show()
    print(g_median.nodes(data=True))
    print(g_median.edges(data=True))
 def test_the_simple_two(Gn, gkernel):
    from gk_iam import gk_iam_nearest_multi
    lmbda = 0.03 # termination probalility
    r_max = 10 # recursions
    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 2 # k nearest neighbors
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
    Gn_mix = [g.copy() for g in Gn]
    Gn_mix.append(g1.copy())
    Gn_mix.append(g2.copy())
 #    g_tmp = iam([g1, g2])
 #    nx.draw_networkx(g_tmp)
 #    plt.show()
    # compute 
 #    k_list = [] # kernel between each graph and itself.
 #    k_g1_list = [] # kernel between each graph and g1
 #    k_g2_list = [] # kernel between each graph and g2
 #    for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout): 
 #        ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
 #        k_list.append(ktemp[0][0, 0])
 #        k_g1_list.append(ktemp[0][0, 1])
 #        k_g2_list.append(ktemp[0][0, 2])
    km = compute_kernel(Gn_mix, gkernel, True)
 #    k_list = np.diag(km) # kernel between each graph and itself.
 #    k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
 #    k_g2_list = km[idx_gi[1]] # kernel between each graph and g2    
    g_best = []
    dis_best = []
    # for each alpha
    for alpha in alpha_range:
        print('alpha =', alpha)
        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
                                               range(len(Gn), len(Gn) + 2), km,
                                               k, r_max,gkernel)
        dis_best.append(dhat)
        g_best.append(ghat_list)
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
        print('the corresponding pre-images are')
        for g in g_best[idx]:
            nx.draw_networkx(g)
            plt.show()
            print(g.nodes(data=True))
            print(g.edges(data=True))
 def test_remove_bests(Gn, gkernel):
    from gk_iam import gk_iam_nearest_multi
    lmbda = 0.03 # termination probalility
    r_max = 10 # recursions
    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 20 # k nearest neighbors
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
    # remove the best 2 graphs.
    del Gn[idx_gi[0]]
    del Gn[idx_gi[1] - 1]
 #    del Gn[8]
    Gn_mix = [g.copy() for g in Gn]
    Gn_mix.append(g1.copy())
    Gn_mix.append(g2.copy())
    # compute
    km = compute_kernel(Gn_mix, gkernel, True)
    g_best = []
    dis_best = []
    # for each alpha
    for alpha in alpha_range:
        print('alpha =', alpha)
        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
                                               range(len(Gn), len(Gn) + 2), km, 
                                               k, r_max, gkernel)
        dis_best.append(dhat)
        g_best.append(ghat_list)
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
        print('the corresponding pre-images are')
        for g in g_best[idx]:
            draw_Letter_graph(g)
 #            nx.draw_networkx(g)
 #            plt.show()
            print(g.nodes(data=True))
            print(g.edges(data=True))
 ###############################################################################
 # Tests on dataset Letter-H.
 def test_gkiam_letter_h():
    from gk_iam import gk_iam_nearest_multi
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
 #          'extra_params': {}} # node nsymb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'
    lmbda = 0.03 # termination probalility
    r_max = 3 # recursions
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 10 # k nearest neighbors
    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    for letter in idx_dict:
        print('\n-------------------------------------------------------\n')
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
        # compute
        time0 = time.time()
        km = compute_kernel(Gn_mix, gkernel, True)
        g_best = []
        dis_best = []
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, 
                Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)), 
                km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/gk_iam/')
 #            nx.draw_networkx(g)
 #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', 
                                     ged_method='IPFP', saveGXL='gedlib-letter')
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        sod_ks_min_list.append(sod_ks)
        nb_updated_list.append(nb_updated)
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
    print('\nnumber of updates for each letter: ', nb_updated_list)             
    print('\ntimes:', time_list)
 #def compute_letter_median_by_average(Gn):
 #    return g_median
 def test_iam_letter_h():
    from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
 #          'extra_params': {}} # node nsymb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    lmbda = 0.03 # termination probalility
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_list = []
    sod_min_list = []
    for letter in idx_dict:        
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
        # compute
        g_best = []
        dis_best = []
        time0 = time.time()
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
                Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/iam/')
 #            nx.draw_networkx(g)
 #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))
        # compute the corresponding sod in kernel space. (alpha range not considered.)
        gkernel = 'structuralspkernel'        
        sod_tmp = []
        Gn_mix = g_best[0] + Gn_let
        km = compute_kernel(Gn_mix, gkernel, True)
        for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
            dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)), 
                              [alpha_range[0]] * len(Gn_let), km, withterm3=False)
            sod_tmp.append(dtemp)
        sod_list.append(sod_tmp)
        sod_min_list.append(np.min(sod_tmp))
    print('\nsods in kernel space: ', sod_list)
    print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
    print('\ntimes:', time_list)
 def test_random_preimage_letter_h():
    from preimage_random import preimage_random
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
 #          'extra_params': {}} # node nsymb
    #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
 #          'extra_params': {}}
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
 #            'extra_params': {}} # node symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'
 #    lmbda = 0.03 # termination probalility
    r_max = 3 # 10 # recursions
    l = 500
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    #alpha_range = np.linspace(0.1, 0.9, 9)
    k = 10 # 5 # k nearest neighbors
    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_list = []
    sod_min_list = []
    for letter in idx_dict:
        print('\n-------------------------------------------------------\n')
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
        # compute
        time0 = time.time()
        km = compute_kernel(Gn_mix, gkernel, True)
        g_best = []
        dis_best = []
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let), 
                                                   range(len(Gn_let), len(Gn_mix)), km, 
                                                   k, r_max, gkernel, c_ei=1.7, 
                                                   c_er=1.7, c_es=1.7)
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/gk_iam/')
 #            nx.draw_networkx(g)
 #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = ged_median(g_best[0], Gn_let)
        sod_list.append(sod_tmp)
        sod_min_list.append(np.min(sod_tmp))
    print('\nsods in graph space: ', sod_list)
    print('\nsmallest sod in graph space for each letter: ', sod_min_list)               
    print('\ntimes:', time_list)
 def test_gkiam_mutag():
    from gk_iam import gk_iam_nearest_multi
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
 #          'extra_params': {}} # node nsymb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'
    lmbda = 0.03 # termination probalility
    r_max = 3 # recursions
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 20 # k nearest neighbors
    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    for letter in idx_dict:
        print('\n-------------------------------------------------------\n')
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
        # compute
        time0 = time.time()
        km = compute_kernel(Gn_mix, gkernel, True)
        g_best = []
        dis_best = []
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), 
                                                   range(len(Gn_let), len(Gn_mix)), km, 
                                                   k, r_max, gkernel, c_ei=1.7, 
                                                   c_er=1.7, c_es=1.7)
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/gk_iam/')
 #            nx.draw_networkx(g)
 #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = ged_median(g_best[0], Gn_let)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        sod_ks_min_list.append(sod_ks)
        nb_updated_list.append(nb_updated)
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
    print('\nnumber of updates for each letter: ', nb_updated_list)             
    print('\ntimes:', time_list)
 ###############################################################################
 # Re-test.
 def retest_the_simple_two():
    from gk_iam import gk_iam_nearest_multi
    # The two simple graphs.
 #    g1 = nx.Graph(name='haha')
 #    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
 #    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
 #    g2 = nx.Graph(name='hahaha')
 #    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
 #                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
 #    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
 #                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
    g1 = nx.Graph(name='haha')
    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
    g2 = nx.Graph(name='hahaha')
    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
 #    # randomly select two molecules
 #    np.random.seed(1)
 #    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
 #    g1 = Gn[idx_gi[0]]
 #    g2 = Gn[idx_gi[1]]
 #    Gn_mix = [g.copy() for g in Gn]
 #    Gn_mix.append(g1.copy())
 #    Gn_mix.append(g2.copy())
    Gn = [g1.copy(), g2.copy()]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # recursions
 #    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 2 # k nearest neighbors
    epsilon = 1e-6
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    c_ei=1
    c_er=1
    c_es=1
    Gn_mix = Gn + [g1.copy(), g2.copy()]
    # compute         
    time0 = time.time()
    km = compute_kernel(Gn_mix, gkernel, True)
    time_km = time.time() - time0
    time_list = []
    sod_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []       
    g_best = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        sod_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        nb_updated_list.append(nb_updated)       
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                with_labels=True)
        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.show()
        print(g_best[idx][0].nodes(data=True))
        print(g_best[idx][0].edges(data=True))
 #        for g in g_best[idx]:
 #            draw_Letter_graph(g, savepath='results/gk_iam/')
 ##            nx.draw_networkx(g)
 ##            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
    # compute the corresponding sod in graph space.
    for idx, item in enumerate(alpha_range):
        sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 if __name__ == '__main__':
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
 #    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
 #          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
 #          'extra_params': {}}
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
 #        'extra_params': {}} # node symb
 #    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:20]
 #    import networkx.algorithms.isomorphism as iso
 #    G1 = nx.MultiDiGraph()
 #    G2 = nx.MultiDiGraph()
 #    G1.add_nodes_from([1,2,3], fill='red')
 #    G2.add_nodes_from([10,20,30,40], fill='red')
 #    nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
 #    nx.add_path(G2, [10,20,30,40], weight=3)
 #    nm = iso.categorical_node_match('fill', 'red')
 #    print(nx.is_isomorphic(G1, G2, node_match=nm))
 #    
 #    test_new_IAM_allGraph_deleteNodes(Gn)
 #    test_will_IAM_give_the_median_graph_we_wanted(Gn)
 #    test_who_is_the_closest_in_GED_space(Gn)
 #    test_who_is_the_closest_in_kernel_space(Gn)
 #    test_the_simple_two(Gn, 'untilhpathkernel')
 #    test_remove_bests(Gn, 'untilhpathkernel')
 #    test_gkiam_letter_h()
 #    test_iam_letter_h()
 #    test_random_preimage_letter_h
 ###############################################################################
 # retests.
    retest_the_simple_two()
--- a/gklearn/preimage/test_preimage_iam.py
+++ b/gklearn/preimage/test_preimage_iam.py
@@ -1,620 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 #from tqdm import tqdm
 from gklearn.utils.graphfiles import loadDataset
 from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices
 from gklearn.preimage.ged import ged_median
 from gklearn.preimage.preimage_iam import preimage_iam 
 ###############################################################################
 # tests on different values on grid of median-sets and k.
 def test_preimage_iam_grid_k_median_nb():       
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
 #    alpha_range = np.linspace(0.5, 0.5, 1)
 #    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # number of nearest neighbors.
    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    nb_updated_k_list = []
    g_best = []
    for idx_nb, nb_median in enumerate(nb_median_range):
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        sod_gs_min_list.append([])
        nb_updated_list.append([])
        nb_updated_k_list.append([])
        g_best.append([])   
        for k in k_range:
            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
            print('k =', k)
            time0 = time.time()
            dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
                preimage_iam(Gn, Gn_median,
                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
                gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                            'removeNodes': removeNodes, 'connected': connected_iam},
                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                            'saveGXL': saveGXL})
            time_total = time.time() - time0 + time_km
            print('time: ', time_total)
            time_list[idx_nb].append(time_total)
            print('\nsmallest distance in kernel space: ', dhat) 
            dis_ks_min_list[idx_nb].append(dhat)
            g_best[idx_nb].append(ghat_list)
            print('\nnumber of updates of the best graph by IAM: ', nb_updated)
            nb_updated_list[idx_nb].append(nb_updated)
            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k)
            nb_updated_k_list[idx_nb].append(nb_updated_k)
            # show the best graph and save it to file.
            print('the shortest distance is', dhat)
            print('one of the possible corresponding pre-images is')
            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                    with_labels=True)
            plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
                        '_k' + str(k) + '.png', format="PNG")
    #        plt.show()
            plt.clf()
    #        print(ghat_list[0].nodes(data=True))
    #        print(ghat_list[0].edges(data=True))
            # compute the corresponding sod in graph space.
            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                         ged_method=ged_method, saveGXL=saveGXL)
            sod_gs_list[idx_nb].append(sod_tmp)
            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
          sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
          nb_updated_list)
    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
          nb_updated_k_list)
    print('\ntimes:', time_list)
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_preimage_iam_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 3 # iteration limit for pre-image.
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    # parameters for IAM function
 #    c_vi = 0.037
 #    c_vr = 0.038
 #    c_vs = 0.075
 #    c_ei = 0.001
 #    c_er = 0.001
 #    c_es = 0.0
    c_vi = 4
    c_vr = 4
    c_vs = 2
    c_ei = 1
    c_er = 1
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # parameters for GED function
 #    ged_cost='CHEM_1'
    ged_cost = 'CONSTANT'
    ged_method = 'IPFP'
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
                  'edit_cost_constant': edit_cost_constant, 
                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
    # number of graphs; we what to compute the median of these graphs. 
 #    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    nb_median_range = [2]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
            preimage_iam(Gn, Gn_median,
            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
            gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                        'removeNodes': removeNodes, 'connected': connected_iam},
            params_ged=params_ged)
        time_total = time.time() - time0 + time_km
        print('\ntime: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat) 
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        print('\nnumber of updates of the best graph: ', nb_updated)
        nb_updated_list.append(nb_updated)
        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
        nb_updated_k_list.append(nb_updated_k)
        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                with_labels=True)
        plt.show()
 #        plt.savefig('results/preimage_iam/mutag_median_cs.001_nb' + str(nb_median) + 
 #                    '.png', format="PNG")
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, params_ged=params_ged)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
          nb_updated_list)
    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
          nb_updated_k_list)
    print('\ntimes:', time_list)
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 def test_gkiam_2combination_all_pairs():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = False
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    nb_update_mat = np.full((len(Gn), len(Gn)), np.inf)
    # test on each pair of graphs.
 #    for idx1 in range(len(Gn) - 1, -1, -1):
 #        for idx2 in range(idx1, -1, -1):
    for idx1 in range(187, 188):
        for idx2 in range(167, 168):
            g1 = Gn[idx1].copy()
            g2 = Gn[idx2].copy()
        #    Gn[10] = []
        #    Gn[10] = []
            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
            plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG")
            plt.show()
            plt.clf()
            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
            plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG")
            plt.show()
            plt.clf()
            ###################################################################            
 #            Gn_mix = [g.copy() for g in Gn]
 #            Gn_mix.append(g1.copy())
 #            Gn_mix.append(g2.copy())
 #            
 #            # compute
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
 #            
 #            # write Gram matrix to file and read it.
 #            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
            ###################################################################
            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
            km = gmfile['gm']
            time_km = gmfile['gmtime']
            # modify mixed gram matrix.
            for i in range(len(Gn)):
                km[i, len(Gn)] = km[i, idx1]
                km[i, len(Gn) + 1] = km[i, idx2]
                km[len(Gn), i] = km[i, idx1]
                km[len(Gn) + 1, i] = km[i, idx2]
            km[len(Gn), len(Gn)] = km[idx1, idx1]
            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
            ###################################################################
 #            # use only the two graphs in median set as candidates.
 #            Gn = [g1.copy(), g2.copy()]
 #            Gn_mix = Gn + [g1.copy(), g2.copy()]
 #            # compute         
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
            time_list = []
            dis_ks_min_list = []
            sod_gs_list = []
            sod_gs_min_list = []
            nb_updated_list = []
            nb_updated_k_list = [] 
            g_best = []
            # for each alpha
            for alpha in alpha_range:
                print('\n-------------------------------------------------------\n')
                print('alpha =', alpha)
                time0 = time.time()
                dhat, ghat_list, sod_ks, nb_updated, nb_updated_k = \
                    preimage_iam(Gn, [g1, g2],
                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
                    gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                                'removeNodes': removeNodes, 'connected': connected_iam},
                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                                'saveGXL': saveGXL})
                time_total = time.time() - time0 + time_km
                print('time: ', time_total)
                time_list.append(time_total)
                dis_ks_min_list.append(dhat)
                g_best.append(ghat_list)
                nb_updated_list.append(nb_updated)
                nb_updated_k_list.append(nb_updated_k)
            # show best graphs and save them to file.
            for idx, item in enumerate(alpha_range):
                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
                print('one of the possible corresponding pre-images is')
                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                        with_labels=True)
                plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' + str(idx2) 
                            + '_alpha' + str(item) + '.png', format="PNG")
 #                plt.show()
                plt.clf()
 #                print(g_best[idx][0].nodes(data=True))
 #                print(g_best[idx][0].edges(data=True))
        #        for g in g_best[idx]:
        #            draw_Letter_graph(g, savepath='results/gk_iam/')
        ##            nx.draw_networkx(g)
        ##            plt.show()
        #            print(g.nodes(data=True))
        #            print(g.edges(data=True))
            # compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
                                             ged_method=ged_method, saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))
            print('\nsods in graph space: ', sod_gs_list)
            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
            print('\nnumber of updates of the best graph for each alpha: ', 
                  nb_updated_list)
            print('\nnumber of updates of the k nearest graphs for each alpha: ', 
                  nb_updated_k_list)
            print('\ntimes:', time_list)
            nb_update_mat[idx1, idx2] = nb_updated_list[0]
            str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2, nb_updated_list[0])
            with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file:
                content = file.read()
                file.seek(0, 0)
                file.write(str_fw + content)
 def test_gkiam_2combination():
    from gk_iam import gk_iam_nearest_multi
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 20 # k nearest neighbors
    epsilon = 1e-6
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    c_ei=1
    c_er=1
    c_es=1
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [10, 11] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
 #    Gn[10] = []
 #    Gn[10] = []
 #    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
 #    plt.show()
 #    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
 #    plt.show() 
    Gn_mix = [g.copy() for g in Gn]
    Gn_mix.append(g1.copy())
    Gn_mix.append(g2.copy())
    # compute
 #    time0 = time.time()
 #    km = compute_kernel(Gn_mix, gkernel, True)
 #    time_km = time.time() - time0
    # write Gram matrix to file and read it.
 #    np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km)
    gmfile = np.load('results/gram_matrix.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []       
    g_best = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        nb_updated_list.append(nb_updated)       
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                with_labels=True)
        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.show()
        print(g_best[idx][0].nodes(data=True))
        print(g_best[idx][0].edges(data=True))
 #        for g in g_best[idx]:
 #            draw_Letter_graph(g, savepath='results/gk_iam/')
 ##            nx.draw_networkx(g)
 ##            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
    # compute the corresponding sod in graph space.
    for idx, item in enumerate(alpha_range):
        sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 #    test_gkiam_2combination()
 #    test_gkiam_2combination_all_pairs()
 ###############################################################################
 # tests on different numbers of median-sets.
    test_preimage_iam_median_nb()
 ###############################################################################
 # tests on different values on grid of median-sets and k.
 #    test_preimage_iam_grid_k_median_nb()
--- a/gklearn/preimage/test_preimage_mix.py
+++ b/gklearn/preimage/test_preimage_mix.py
@@ -1,539 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 #from tqdm import tqdm
 from gklearn.utils.graphfiles import loadDataset
 from gklearn.preimage.ged import ged_median
 from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
 from gklearn.preimage.preimage_iam import preimage_iam_random_mix
 ###############################################################################
 # tests on different values on grid of median-sets and k.
 def test_preimage_mix_grid_k_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
 #    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    InitRandomWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # number of nearest neighbors.
    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list_iam = []
    nb_updated_list_random = []
    nb_updated_k_list_iam = []
    nb_updated_k_list_random = []
    g_best = []
    for idx_nb, nb_median in enumerate(nb_median_range):
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        sod_gs_min_list.append([])
        nb_updated_list_iam.append([])
        nb_updated_list_random.append([])
        nb_updated_k_list_iam.append([])
        nb_updated_k_list_random.append([])
        g_best.append([])   
        for k in k_range:
            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
            print('k =', k)
            time0 = time.time()
            dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
                nb_updated_k_iam, nb_updated_k_random = \
                preimage_iam_random_mix(Gn, Gn_median,
                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
                l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
                InitRandomWithAllDk=InitRandomWithAllDk,
                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                            'removeNodes': removeNodes, 'connected': connected_iam},
                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                            'saveGXL': saveGXL})
            time_total = time.time() - time0 + time_km
            print('time: ', time_total)
            time_list[idx_nb].append(time_total)
            print('\nsmallest distance in kernel space: ', dhat) 
            dis_ks_min_list[idx_nb].append(dhat)
            g_best[idx_nb].append(ghat_list)
            print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
            nb_updated_list_iam[idx_nb].append(nb_updated_iam)
            print('\nnumber of updates of the best graph by random generation: ', 
                  nb_updated_random)
            nb_updated_list_random[idx_nb].append(nb_updated_random)
            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
            nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam)
            print('\nnumber of updates of k nearest graphs by random generation: ', 
                  nb_updated_k_random)
            nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) 
            # show the best graph and save it to file.
            print('the shortest distance is', dhat)
            print('one of the possible corresponding pre-images is')
            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                    with_labels=True)
            plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
                        '_k' + str(k) + '.png', format="PNG")
    #        plt.show()
            plt.clf()
    #        print(ghat_list[0].nodes(data=True))
    #        print(ghat_list[0].edges(data=True))
            # compute the corresponding sod in graph space.
            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                         ged_method=ged_method, saveGXL=saveGXL)
            sod_gs_list[idx_nb].append(sod_tmp)
            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
          sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
          nb_updated_list_iam)
    print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', 
          nb_updated_list_random)
    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
          nb_updated_k_list_iam)
    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', 
          nb_updated_k_list_random)
    print('\ntimes:', time_list)
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_preimage_mix_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    InitRandomWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list_iam = []
    nb_updated_list_random = []
    nb_updated_k_list_iam = []
    nb_updated_k_list_random = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
            nb_updated_k_iam, nb_updated_k_random = \
            preimage_iam_random_mix(Gn, Gn_median,
            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
            l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
            InitRandomWithAllDk=InitRandomWithAllDk,
            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                        'removeNodes': removeNodes, 'connected': connected_iam},
            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                        'saveGXL': saveGXL})
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat) 
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
        nb_updated_list_iam.append(nb_updated_iam)
        print('\nnumber of updates of the best graph by random generation: ', 
              nb_updated_random)
        nb_updated_list_random.append(nb_updated_random)
        print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
        nb_updated_k_list_iam.append(nb_updated_k_iam)
        print('\nnumber of updates of k nearest graphs by random generation: ', 
              nb_updated_k_random)
        nb_updated_k_list_random.append(nb_updated_k_random) 
        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                with_labels=True)
        plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
                    '.png', format="PNG")
 #        plt.show()
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
          nb_updated_list_iam)
    print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', 
          nb_updated_list_random)
    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
          nb_updated_k_list_iam)
    print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', 
          nb_updated_k_list_random)
    print('\ntimes:', time_list)
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 def test_preimage_mix_2combination_all_pairs():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    InitRandomWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
    nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
    # test on each pair of graphs.
 #    for idx1 in range(len(Gn) - 1, -1, -1):
 #        for idx2 in range(idx1, -1, -1):
    for idx1 in range(187, 188):
        for idx2 in range(167, 168):
            g1 = Gn[idx1].copy()
            g2 = Gn[idx2].copy()
        #    Gn[10] = []
        #    Gn[10] = []
            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
            plt.show()
            plt.clf()
            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
            plt.show()
            plt.clf()
            ###################################################################            
 #            Gn_mix = [g.copy() for g in Gn]
 #            Gn_mix.append(g1.copy())
 #            Gn_mix.append(g2.copy())
 #            
 #            # compute
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
 #            
 #            # write Gram matrix to file and read it.
 #            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
            ###################################################################
            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
            km = gmfile['gm']
            time_km = gmfile['gmtime']
            # modify mixed gram matrix.
            for i in range(len(Gn)):
                km[i, len(Gn)] = km[i, idx1]
                km[i, len(Gn) + 1] = km[i, idx2]
                km[len(Gn), i] = km[i, idx1]
                km[len(Gn) + 1, i] = km[i, idx2]
            km[len(Gn), len(Gn)] = km[idx1, idx1]
            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
            ###################################################################
 #            # use only the two graphs in median set as candidates.
 #            Gn = [g1.copy(), g2.copy()]
 #            Gn_mix = Gn + [g1.copy(), g2.copy()]
 #            # compute         
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
            time_list = []
            dis_ks_min_list = []
            sod_gs_list = []
            sod_gs_min_list = []
            nb_updated_list_iam = []
            nb_updated_list_random = []
            nb_updated_k_list_iam = []
            nb_updated_k_list_random = []
            g_best = []
            # for each alpha
            for alpha in alpha_range:
                print('\n-------------------------------------------------------\n')
                print('alpha =', alpha)
                time0 = time.time()
                dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
                    nb_updated_k_iam, nb_updated_k_random = \
                    preimage_iam_random_mix(Gn, [g1, g2],
                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
                    l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
                    InitRandomWithAllDk=InitRandomWithAllDk,
                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                                'removeNodes': removeNodes, 'connected': connected_iam},
                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                                'saveGXL': saveGXL})
                time_total = time.time() - time0 + time_km
                print('time: ', time_total)
                time_list.append(time_total)
                dis_ks_min_list.append(dhat)
                g_best.append(ghat_list)
                nb_updated_list_iam.append(nb_updated_iam)       
                nb_updated_list_random.append(nb_updated_random)
                nb_updated_k_list_iam.append(nb_updated_k_iam)       
                nb_updated_k_list_random.append(nb_updated_k_random) 
            # show best graphs and save them to file.
            for idx, item in enumerate(alpha_range):
                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
                print('one of the possible corresponding pre-images is')
                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                        with_labels=True)
                plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) 
                            + '_alpha' + str(item) + '.png', format="PNG")
 #                plt.show()
                plt.clf()
 #                print(g_best[idx][0].nodes(data=True))
 #                print(g_best[idx][0].edges(data=True))
        #        for g in g_best[idx]:
        #            draw_Letter_graph(g, savepath='results/gk_iam/')
        ##            nx.draw_networkx(g)
        ##            plt.show()
        #            print(g.nodes(data=True))
        #            print(g.edges(data=True))
            # compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
                                             ged_method=ged_method, saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))
            print('\nsods in graph space: ', sod_gs_list)
            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
            print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
            print('\nnumber of updates of the best graph for each alpha by random generation: ', 
                  nb_updated_list_random)
            print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', 
                  nb_updated_k_list_iam)
            print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', 
                  nb_updated_k_list_random)
            print('\ntimes:', time_list)
            nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
            nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
            str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
                % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
            with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
                content = file.read()
                file.seek(0, 0)
                file.write(str_fw + content)
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 #    test_preimage_mix_2combination_all_pairs()
 ###############################################################################
 # tests on different numbers of median-sets.
 #    test_preimage_mix_median_nb()
 ###############################################################################
 # tests on different values on grid of median-sets and k.
    test_preimage_mix_grid_k_median_nb()
--- a/gklearn/preimage/test_preimage_random.py
+++ b/gklearn/preimage/test_preimage_random.py
@@ -1,398 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 #from tqdm import tqdm
 from gklearn.utils.graphfiles import loadDataset
 from gklearn.preimage.preimage_random import preimage_random
 from gklearn.preimage.ged import ged_median
 from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
 ###############################################################################
 # tests on different values on grid of median-sets and k.
 def test_preimage_random_grid_k_median_nb():    
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
 #    k = 5 # k nearest neighbors
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # number of nearest neighbors.
    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    g_best = []
    for idx_nb, nb_median in enumerate(nb_median_range):
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        sod_gs_min_list.append([])
        nb_updated_list.append([])
        g_best.append([])   
        for k in k_range:
            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
            print('k =', k)
            time0 = time.time()
            dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
                range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
            time_total = time.time() - time0 + time_km
            print('time: ', time_total)
            time_list[idx_nb].append(time_total)
            print('\nsmallest distance in kernel space: ', dhat) 
            dis_ks_min_list[idx_nb].append(dhat)
            g_best[idx_nb].append(ghat)
            print('\nnumber of updates of the best graph: ', nb_updated)
            nb_updated_list[idx_nb].append(nb_updated)
            # show the best graph and save it to file.
            print('the shortest distance is', dhat)
            print('one of the possible corresponding pre-images is')
            nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
                    with_labels=True)
            plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
                        '_k' + str(k) + '.png', format="PNG")
    #        plt.show()
            plt.clf()
    #        print(ghat_list[0].nodes(data=True))
    #        print(ghat_list[0].edges(data=True))
            # compute the corresponding sod in graph space.
            sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
                                         ged_method=ged_method, saveGXL=saveGXL)
            sod_gs_list[idx_nb].append(sod_tmp)
            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
          sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
          nb_updated_list)
    print('\ntimes:', time_list)
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_preimage_random_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
            range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat) 
        dis_ks_min_list.append(dhat)
        g_best.append(ghat)
        print('\nnumber of updates of the best graph: ', nb_updated)
        nb_updated_list.append(nb_updated)
        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
                with_labels=True)
        plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
                    '.png', format="PNG")
 #        plt.show()
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs: ', 
          nb_updated_list)
    print('\ntimes:', time_list)
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 def test_random_preimage_2combination():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:12]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
 #    print(dis_max, dis_min, dis_mean)
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l = 500
    alpha_range = np.linspace(0, 1, 11)
    k = 5 # k nearest neighbors
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
 #    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
 #    plt.show()
 #    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
 #    plt.show()    
    ######################################################################
 #    Gn_mix = [g.copy() for g in Gn]
 #    Gn_mix.append(g1.copy())
 #    Gn_mix.append(g2.copy())
 #    
 ##    g_tmp = iam([g1, g2])
 ##    nx.draw_networkx(g_tmp)
 ##    plt.show()
 #    
 #    # compute 
 #    time0 = time.time()
 #    km = compute_kernel(Gn_mix, gkernel, True)
 #    time_km = time.time() - time0
    ###################################################################
    idx1 = idx_gi[0]
    idx2 = idx_gi[1]
    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']
    # modify mixed gram matrix.
    for i in range(len(Gn)):
        km[i, len(Gn)] = km[i, idx1]
        km[i, len(Gn) + 1] = km[i, idx2]
        km[len(Gn), i] = km[i, idx1]
        km[len(Gn) + 1, i] = km[i, idx2]
    km[len(Gn), len(Gn)] = km[idx1, idx1]
    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
    ###################################################################
    time_list = []
    nb_updated_list = []
    g_best = []
    dis_ks_min_list = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], 
                                          range(len(Gn), len(Gn) + 2), km,
                                          k, r_max, l, gkernel)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat)
        nb_updated_list.append(nb_updated)
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
                with_labels=True)
        plt.show()
        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.clf()
        print(g_best[idx].nodes(data=True))
        print(g_best[idx].edges(data=True))
 #        # compute the corresponding sod in graph space. (alpha range not considered.)
 #        sod_tmp, _ = median_distance(g_best[0], Gn_let)
 #        sod_gs_list.append(sod_tmp)
 #        sod_gs_min_list.append(np.min(sod_tmp))
 #        sod_ks_min_list.append(sod_ks)
 #        nb_updated_list.append(nb_updated)
 #    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 #    test_random_preimage_2combination()
 ###############################################################################
 # tests all algorithms on different numbers of median-sets.
    test_preimage_random_median_nb()
 ###############################################################################
 # tests all algorithms on different values on grid of median-sets and k.
 #    test_preimage_random_grid_k_median_nb()
--- a/gklearn/preimage/xp_fit_method.py
+++ b/gklearn/preimage/xp_fit_method.py
@@ -1,935 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Jan 14 15:39:29 2020
@author: ljia
 """
 import numpy as np
 import random
 import csv
 from shutil import copyfile
 import networkx as nx
 import matplotlib.pyplot as plt
 import os
 import time
 from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
 from gklearn.preimage.find_best_k import getRelations
 def get_dataset(ds_name):
    if ds_name == 'Letter-high': # node non-symb
        dataset = 'cpp_ext/data/collections/Letter.xml'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' 
        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
        for G in Gn:
            reform_attributes(G, na_names=['x', 'y'])
            G.graph['node_labels'] = []
            G.graph['edge_labels'] = []
            G.graph['node_attrs'] = ['x', 'y']
            G.graph['edge_attrs'] = []
    elif ds_name == 'Letter-med': # node non-symb
        dataset = 'cpp_ext/data/collections/Letter.xml'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/MED/' 
        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
        for G in Gn:
            reform_attributes(G, na_names=['x', 'y'])
            G.graph['node_labels'] = []
            G.graph['edge_labels'] = []
            G.graph['node_attrs'] = ['x', 'y']
            G.graph['edge_attrs'] = []
    elif ds_name == 'Letter-low': # node non-symb
        dataset = 'cpp_ext/data/collections/Letter.xml'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/LOW/' 
        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
        for G in Gn:
            reform_attributes(G, na_names=['x', 'y'])
            G.graph['node_labels'] = []
            G.graph['edge_labels'] = []
            G.graph['node_attrs'] = ['x', 'y']
            G.graph['edge_attrs'] = []
    elif ds_name == 'Fingerprint':
 #        dataset = 'cpp_ext/data/collections/Fingerprint.xml'
 #        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
 #        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
 #        for G in Gn:
 #            reform_attributes(G)
        dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
        Gn, y_all = loadDataset(dataset)
    elif ds_name == 'SYNTHETIC':
        pass
    elif ds_name == 'SYNTHETICnew':
        dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew'
 #        dataset = '../../datasets/Letter-high/Letter-high_A.txt'
 #        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
        Gn, y_all = loadDataset(dataset)
    elif ds_name == 'Synthie':
        pass
    elif ds_name == 'COIL-DEL':
        dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/COIL-DEL/'
        Gn, y_all = loadDataset(dataset)
    elif ds_name == 'COIL-RAG':
        pass
    elif ds_name == 'COLORS-3':
        pass
    elif ds_name == 'FRANKENSTEIN':
        pass
    return Gn, y_all, graph_dir
 def init_output_file(ds_name, gkernel, fit_method, dir_output):
 #    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
    f_detail = open(dir_output + fn_output_detail, 'a')
    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', 
              'GED method', 'attr distance', 'fit method', 'k', 
              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
              'median set'])
    f_detail.close()
 #    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
    f_summary = open(dir_output + fn_output_summary, 'a')
    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', 
              'GED method', 'attr distance', 'fit method', 'k', 
              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
              '# SOD SM -> GM', '# dis_k SM -> GM', 
              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
              'repeats better dis_k gi -> GM'])
    f_summary.close()
    return fn_output_detail, fn_output_summary
 def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1,
                                   Gn_data=None, k_dis_data=None, Kmatrix=None,
                                   is_separate=False):
    # 1. set parameters.
    print('1. setting parameters...')
    ds_name = parameters['ds_name']
    gkernel = parameters['gkernel']
    edit_cost_name = parameters['edit_cost_name']
    ged_method = parameters['ged_method']
    attr_distance = parameters['attr_distance']
    fit_method = parameters['fit_method']
    init_ecc = parameters['init_ecc']
    node_label = None
    edge_label = None
    dir_output = 'results/xp_fit_method/'    
    # 2. get dataset.
    print('2. getting dataset...')
    if Gn_data is None:
        Gn, y_all, graph_dir = get_dataset(ds_name)
    else:
        Gn = Gn_data[0]
        y_all = Gn_data[1]
        graph_dir = Gn_data[2]
    # 3. compute kernel distance matrix.
    print('3. computing kernel distance matrix...')
    if k_dis_data is None:
        dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, 
            None, Kmatrix=Kmatrix, gkernel=gkernel)
    else:
 #        dis_mat = k_dis_data[0]
 #        dis_max = k_dis_data[1]
 #        dis_min = k_dis_data[2]
 #        dis_mean = k_dis_data[3]
 #        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean)
        pass
    if save_results:
        # create result files.
        print('creating output files...')
        fn_output_detail, fn_output_summary = init_output_file(ds_name, gkernel, 
                                                               fit_method, dir_output)
    # start repeats.    
    repeats = 1
 #    k_list = range(2, 11)
    k_list = [0]
    # get indices by classes.
    y_idx = get_same_item_indices(y_all)
    random.seed(1)
    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
    for k in k_list:
 #        print('\n--------- k =', k, '----------')
        sod_sm_mean_list = []
        sod_gm_mean_list = []
        dis_k_sm_mean_list = []
        dis_k_gm_mean_list = []
        dis_k_gi_min_mean_list = []
        time_fitting_mean_list = []
        time_generating_mean_list = []
        time_total_mean_list = []
        # 3. start generating and computing over targets.
        print('4. starting generating and computing over targets......')
        for i, (y, values) in enumerate(y_idx.items()):
 #            y = 'I'
 #            values = y_idx[y]
 #            values = values[0:10]            
            print('\ny =', y)
 #            if y.strip() == 'A':
 #                continue
            k = len(values)
            print('\n--------- k =', k, '----------')
            if k < 2:
                print('\nk = ', k, ', skip.\n')
                continue
            sod_sm_list = []
            sod_gm_list = []
            dis_k_sm_list = []
            dis_k_gm_list = []
            dis_k_gi_min_list = []
            time_fitting_list = []
            time_generating_list = []
            time_total_list = []
            nb_sod_sm2gm = [0, 0, 0]
            nb_dis_k_sm2gm = [0, 0, 0]
            nb_dis_k_gi2sm = [0, 0, 0]
            nb_dis_k_gi2gm = [0, 0, 0]
            repeats_better_sod_sm2gm = []
            repeats_better_dis_k_sm2gm = []
            repeats_better_dis_k_gi2sm = []
            repeats_better_dis_k_gi2gm = []
            # get Gram matrix for this part of data.
            if Kmatrix is not None:
                if is_separate:
                    Kmatrix_sub = Kmatrix[i].copy()
                else:
                    Kmatrix_sub = Kmatrix[values,:]
                    Kmatrix_sub = Kmatrix_sub[:,values]
            else:
                Kmatrix_sub = None
            for repeat in range(repeats):
                print('\nrepeat =', repeat)
                random.seed(rdn_seed_list[repeat])
                median_set_idx_idx = random.sample(range(0, len(values)), k)
                median_set_idx = [values[idx] for idx in median_set_idx_idx]
                print('median set: ', median_set_idx)
                Gn_median = [Gn[g] for g in values]
 #                from notebooks.utils.plot_all_graphs import draw_Fingerprint_graph
 #                for Gn in Gn_median:
 #                    draw_Fingerprint_graph(Gn, save=None)
                # GENERATING & COMPUTING!!
                res_sods, res_dis_ks, res_times = median_on_k_closest_graphs(Gn_median, 
                        node_label, edge_label, 
                        gkernel, k, fit_method=fit_method, graph_dir=graph_dir,
                        edit_cost_constants=None, group_min=median_set_idx_idx, 
                        dataset=ds_name, initial_solutions=initial_solutions,
                        edit_cost_name=edit_cost_name, init_ecc=init_ecc,
                        Kmatrix=Kmatrix_sub, parallel=False)
                sod_sm = res_sods[0]
                sod_gm = res_sods[1] 
                dis_k_sm = res_dis_ks[0]
                dis_k_gm = res_dis_ks[1]
                dis_k_gi = res_dis_ks[2]
                dis_k_gi_min = res_dis_ks[3]
                idx_dis_k_gi_min = res_dis_ks[4]
                time_fitting = res_times[0]
                time_generating = res_times[1]                    
                # write result detail.
                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
                if save_results:
                    f_detail = open(dir_output + fn_output_detail, 'a')
                    csv.writer(f_detail).writerow([ds_name, gkernel, 
                              edit_cost_name, ged_method, attr_distance,
                              fit_method, k, y, repeat,
                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
                              dis_k_gi2gm, time_fitting, time_generating,
                              time_fitting + time_generating, median_set_idx])
                    f_detail.close()
                # compute result summary.
                sod_sm_list.append(sod_sm)
                sod_gm_list.append(sod_gm)
                dis_k_sm_list.append(dis_k_sm)
                dis_k_gm_list.append(dis_k_gm)
                dis_k_gi_min_list.append(dis_k_gi_min)
                time_fitting_list.append(time_fitting)
                time_generating_list.append(time_generating)
                time_total_list.append(time_fitting + time_generating)
                # # SOD SM -> GM
                if sod_sm > sod_gm:
                    nb_sod_sm2gm[0] += 1
                    repeats_better_sod_sm2gm.append(repeat)
                elif sod_sm == sod_gm:
                    nb_sod_sm2gm[1] += 1
                elif sod_sm < sod_gm:
                    nb_sod_sm2gm[2] += 1
                # # dis_k SM -> GM
                if dis_k_sm > dis_k_gm:
                    nb_dis_k_sm2gm[0] += 1
                    repeats_better_dis_k_sm2gm.append(repeat)
                elif dis_k_sm == dis_k_gm:
                    nb_dis_k_sm2gm[1] += 1
                elif dis_k_sm < dis_k_gm:
                    nb_dis_k_sm2gm[2] += 1
                # # dis_k gi -> SM
                if dis_k_gi_min > dis_k_sm:
                    nb_dis_k_gi2sm[0] += 1
                    repeats_better_dis_k_gi2sm.append(repeat)
                elif dis_k_gi_min == dis_k_sm:
                    nb_dis_k_gi2sm[1] += 1
                elif dis_k_gi_min < dis_k_sm:
                    nb_dis_k_gi2sm[2] += 1
                # # dis_k gi -> GM
                if dis_k_gi_min > dis_k_gm:
                    nb_dis_k_gi2gm[0] += 1
                    repeats_better_dis_k_gi2gm.append(repeat)
                elif dis_k_gi_min == dis_k_gm:
                    nb_dis_k_gi2gm[1] += 1
                elif dis_k_gi_min < dis_k_gm:
                    nb_dis_k_gi2gm[2] += 1
                # save median graphs.
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
 #                reform_attributes(G_best_kernel)
                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='default')
                # plot median graphs.
                if ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low':
                    set_median = loadGXL(fn_pre_sm_new + '.gxl')
                    gen_median = loadGXL(fn_pre_gm_new + '.gxl')                
                    draw_Letter_graph(set_median, fn_pre_sm_new)
                    draw_Letter_graph(gen_median, fn_pre_gm_new)
                    draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
            # write result summary for each letter. 
            sod_sm_mean_list.append(np.mean(sod_sm_list))
            sod_gm_mean_list.append(np.mean(sod_gm_list))
            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
            time_fitting_mean_list.append(np.mean(time_fitting_list))
            time_generating_mean_list.append(np.mean(time_generating_list))
            time_total_mean_list.append(np.mean(time_total_list))
            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            if save_results:
                f_summary = open(dir_output + fn_output_summary, 'a')
                csv.writer(f_summary).writerow([ds_name, gkernel, 
                          edit_cost_name, ged_method, attr_distance,
                          fit_method, k, y,
                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, 
                          time_fitting_mean_list[-1], time_generating_mean_list[-1],
                          time_total_mean_list[-1], nb_sod_sm2gm, 
                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
                f_summary.close()
        # write result summary for each letter. 
        sod_sm_mean = np.mean(sod_sm_mean_list)
        sod_gm_mean = np.mean(sod_gm_mean_list)
        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
        time_fitting_mean = np.mean(time_fitting_list)
        time_generating_mean = np.mean(time_generating_list)
        time_total_mean = np.mean(time_total_list)
        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
        if save_results:
            f_summary = open(dir_output + fn_output_summary, 'a')
            csv.writer(f_summary).writerow([ds_name, gkernel, 
                      edit_cost_name, ged_method, attr_distance,
                      fit_method, k, 'all',
                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
                      dis_k_gi2sm_mean, dis_k_gi2gm_mean,
                      time_fitting_mean, time_generating_mean, time_total_mean])
            f_summary.close()
    print('\ncomplete.')
 #Dessin median courrant
 def draw_Letter_graph(graph, file_prefix):
    plt.figure()
    pos = {}
    for n in graph.nodes:
        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
    nx.draw_networkx(graph, pos)
    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
 #    plt.show()
    plt.clf()
 def compute_gm_for_each_class(Gn, y_all, gkernel, parallel='imap_unordered', is_separate=True):
    if is_separate:
        print('the Gram matrix is computed for each class.')
        y_idx = get_same_item_indices(y_all)
        Kmatrix = []
        run_time = []
        k_dis_data = []
        for i, (y, values) in enumerate(y_idx.items()):
            print('The ', str(i), ' class:')
            Gn_i = [Gn[val] for val in values]
            time0 = time.time()            
            Kmatrix.append(compute_kernel(Gn_i, gkernel, None, None, True, parallel=parallel))
            run_time.append(time.time() - time0)
            k_dis_data.append(kernel_distance_matrix(Gn_i, None, None, 
                Kmatrix=Kmatrix[i], gkernel=gkernel, verbose=True))
        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
        dis_max = np.max([item[1] for item in k_dis_data])
        dis_min = np.min([item[2] for item in k_dis_data])
        dis_mean = np.mean([item[3] for item in k_dis_data])
        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min,
              dis_mean)
    else:
        time0 = time.time()
        Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel=parallel)
        run_time = time.time() - time0
        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
        k_dis_data = kernel_distance_matrix(Gn, None, None, 
            Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
        print('the Gram matrix is computed for the whole dataset.')
        print('pair distances - dis_max, dis_min, dis_mean:', k_dis_data[1], 
              k_dis_data[2], k_dis_data[3])
    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]
    return Kmatrix, run_time, k_dis_data
 if __name__ == "__main__":
 #    #### xp 1: Letter-high, spkernel.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Letter-high'
 #    gkernel = 'spkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 ##    Gn = Gn[0:50]
 ##    y_all = y_all[0:50]
 #    # compute pair distances.
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=None, gkernel=gkernel, verbose=True)
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    # fitting and computing.
 #    fit_methods = ['random', 'expert', 'k-graphs']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method}
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
 #    #### xp 2: Letter-high, sspkernel.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Letter-high'
 #    gkernel = 'structuralspkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 ##    Gn = Gn[0:50]
 ##    y_all = y_all[0:50]
 #    # compute pair distances.
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=None, gkernel=gkernel, verbose=True)
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    # fitting and computing.
 #    fit_methods = ['random', 'expert', 'k-graphs']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method}
 #        print('parameters: ', parameters)
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
 #    #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    run_time = gmfile['run_time']
 #    # normalization
 #    Kmatrix_diag = Kmatrix.diagonal().copy()
 #    for i in range(len(Kmatrix)):
 #        for j in range(i, len(Kmatrix)):
 #            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
 #            Kmatrix[j][i] = Kmatrix[i][j]
 ##    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
 ##             Kmatrix=Kmatrix, run_time=run_time)
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'SYNTHETICnew'
 #    gkernel = 'structuralspkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without nodes and edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
 #          and nx.number_of_edges(G) != 0)]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 ##    Gn = Gn[0:10]
 ##    y_all = y_all[0:10]
 #    for G in Gn:
 #        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
 #    # compute pair distances.
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'NON_SYMBOLIC',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method}
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=1,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
 #    ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    # normalization
 #    Kmatrix_diag = Kmatrix.diagonal().copy()
 #    for i in range(len(Kmatrix)):
 #        for j in range(i, len(Kmatrix)):
 #            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
 #            Kmatrix[j][i] = Kmatrix[i][j]
 #    run_time = 21821.35
 #    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
 #             Kmatrix=Kmatrix, run_time=run_time)
 #    
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'SYNTHETICnew'
 #    gkernel = 'spkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 ##    # remove graphs without nodes and edges.
 ##    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0
 ##          and nx.number_of_edges(G) != 0)]
 ##    idx = [G[0] for G in Gn]
 ##    Gn = [G[1] for G in Gn]
 ##    y_all = [y_all[i] for i in idx]
 ##    Gn = Gn[0:5]
 ##    y_all = y_all[0:5]
 #    for G in Gn:
 #        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
 #    
 #    # compute/read Gram matrix and pair distances.
 ##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
 ##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 ##         Kmatrix=Kmatrix)
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 #    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'NON_SYMBOLIC',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method}
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=1,
 #                                       Gn_data=[Gn, y_all, graph_dir],
 #                                       k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
 #    #### xp 5: Fingerprint, sspkernel, using LETTER2, only node attrs.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Fingerprint'
 #    gkernel = 'structuralspkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without nodes and edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
 ##          and nx.number_of_edges(G) != 0)]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 #    y_idx = get_same_item_indices(y_all)
 #    # remove unused labels.
 #    for G in Gn:
 #        G.graph['edge_attrs'] = []
 #        for edge in G.edges:
 #            del G.edges[edge]['attributes']
 #            del G.edges[edge]['orient']
 #            del G.edges[edge]['angle']
 ##    Gn = Gn[805:815]
 ##    y_all = y_all[805:815]
 #    for G in Gn:
 #        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
 #            
 #    # compute/read Gram matrix and pair distances.
 ##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
 ##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 ##         Kmatrix=Kmatrix)
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 ##    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 ##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method,
 #                      'init_ecc': [1,1,1,1,1]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
 #    #### xp 6: Letter-med, sspkernel.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Letter-med'
 #    gkernel = 'structuralspkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 ##    Gn = Gn[0:50]
 ##    y_all = y_all[0:50]
 #    
 #    # compute/read Gram matrix and pair distances.
 #    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
 #    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 #         Kmatrix=Kmatrix)
 ##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 ##    Kmatrix = gmfile['Kmatrix']
 ##    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 ##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method,
 #                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
 #        print('parameters: ', parameters)
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
 #    #### xp 7: Letter-low, sspkernel.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Letter-low'
 #    gkernel = 'structuralspkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 ##    Gn = Gn[0:50]
 ##    y_all = y_all[0:50]
 #    
 #    # compute/read Gram matrix and pair distances.
 #    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
 #    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 #         Kmatrix=Kmatrix)
 ##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 ##    Kmatrix = gmfile['Kmatrix']
 ##    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 ##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method,
 #                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
 #        print('parameters: ', parameters)
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
 #    #### xp 8: Letter-med, spkernel.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Letter-med'
 #    gkernel = 'spkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without nodes and edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
 #          and nx.number_of_edges(G) != 0)]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 ##    Gn = Gn[0:50]
 ##    y_all = y_all[0:50]
 #    
 #    # compute/read Gram matrix and pair distances.
 #    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
 #    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 #         Kmatrix=Kmatrix)
 ##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 ##    Kmatrix = gmfile['Kmatrix']
 ##    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 ##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method,
 #                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
 #        print('parameters: ', parameters)
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
 #    #### xp 9: Letter-low, spkernel.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Letter-low'
 #    gkernel = 'spkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without nodes and edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
 #          and nx.number_of_edges(G) != 0)]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 ##    Gn = Gn[0:50]
 ##    y_all = y_all[0:50]
 #    
 #    # compute/read Gram matrix and pair distances.
 #    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
 #    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 #         Kmatrix=Kmatrix)
 ##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 ##    Kmatrix = gmfile['Kmatrix']
 ##    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 ##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method,
 #                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
 #        print('parameters: ', parameters)
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
    #### xp 5: COIL-DEL, sspkernel, using LETTER2, only node attrs.
    # load dataset.
    print('getting dataset and computing kernel distance matrix first...')
    ds_name = 'COIL-DEL'
    gkernel = 'structuralspkernel'
    Gn, y_all, graph_dir = get_dataset(ds_name)
    # remove graphs without nodes and edges.
    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
 #          and nx.number_of_edges(G) != 0)]
    idx = [G[0] for G in Gn]
    Gn = [G[1] for G in Gn]
    y_all = [y_all[i] for i in idx]
    # remove unused labels.
    for G in Gn:
        G.graph['edge_labels'] = []
        for edge in G.edges:
            del G.edges[edge]['bond_type']
            del G.edges[edge]['valence']
 #    Gn = Gn[805:815]
 #    y_all = y_all[805:815]
    for G in Gn:
        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
    # compute/read Gram matrix and pair distances.
    is_separate = True
    Kmatrix, run_time, k_dis_data = compute_gm_for_each_class(Gn, 
                                                              y_all, 
                                                              gkernel, 
                                                              parallel='imap_unordered',
                                                              is_separate=is_separate)
 #    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
 #    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 #         Kmatrix=Kmatrix)
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    run_time = gmfile['run_time']
 #    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 #    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 #    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 #    Kmatrix = np.zeros((len(Gn), len(Gn)))
 #    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
    # fitting and computing.
    fit_methods = ['k-graphs', 'random', 'random', 'random']
    for fit_method in fit_methods:
        print('\n-------------------------------------')
        print('fit method:', fit_method)
        parameters = {'ds_name': ds_name,
                      'gkernel': gkernel,
                      'edit_cost_name': 'LETTER2',
                      'ged_method': 'mIPFP',
                      'attr_distance': 'euclidean',
                      'fit_method': fit_method,
                      'init_ecc': [3,3,1,3,3]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
                                       initial_solutions=40,
                                       Gn_data=[Gn, y_all, graph_dir],
                                       k_dis_data=k_dis_data,
                                       Kmatrix=Kmatrix, 
                                       is_separate=is_separate)
--- a/gklearn/preimage/xp_letter_h.py
+++ b/gklearn/preimage/xp_letter_h.py
@@ -1,476 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Jan 14 15:39:29 2020
@author: ljia
 """
 import numpy as np
 import random
 import csv
 from shutil import copyfile
 import networkx as nx
 import matplotlib.pyplot as plt
 from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix
 from gklearn.preimage.find_best_k import getRelations
 def xp_letter_h_LETTER2_cost():
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel')
    for G in Gn:
        reform_attributes(G)
 #    ds = {'name': 'Letter-high', 
 #          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
 #    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    gkernel = 'structuralspkernel'
    node_label = None
    edge_label = None
    ds_name = 'letter-h'
    dir_output = 'results/xp_letter_h/'
    save_results = True
    cost = 'LETTER2'
    repeats = 1
 #    k_list = range(2, 11)
    k_list = [150]
    fit_method = 'k-graphs'
    # get indices by classes.
    y_idx = get_same_item_indices(y_all)
    if save_results:
        # create result files.
        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
        f_detail = open(dir_output + fn_output_detail, 'a')
        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
                  'dis_k gi -> GM', 'median set'])
        f_detail.close()
        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
        f_summary = open(dir_output + fn_output_summary, 'a')
        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
                  'repeats better dis_k gi -> GM'])
        f_summary.close()
    random.seed(1)
    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
    for k in k_list:
        print('\n--------- k =', k, '----------')
        sod_sm_mean_list = []
        sod_gm_mean_list = []
        dis_k_sm_mean_list = []
        dis_k_gm_mean_list = []
        dis_k_gi_min_mean_list = []
 #        nb_sod_sm2gm = [0, 0, 0]
 #        nb_dis_k_sm2gm = [0, 0, 0]
 #        nb_dis_k_gi2sm = [0, 0, 0]
 #        nb_dis_k_gi2gm = [0, 0, 0]
 #        repeats_better_sod_sm2gm = []
 #        repeats_better_dis_k_sm2gm = []
 #        repeats_better_dis_k_gi2sm = []
 #        repeats_better_dis_k_gi2gm = []
        for i, (y, values) in enumerate(y_idx.items()):
            print('\ny =', y)
 #            y = 'F'
 #            values = y_idx[y]
 #            values = values[0:10]
            k = len(values)
            sod_sm_list = []
            sod_gm_list = []
            dis_k_sm_list = []
            dis_k_gm_list = []
            dis_k_gi_min_list = []
            nb_sod_sm2gm = [0, 0, 0]
            nb_dis_k_sm2gm = [0, 0, 0]
            nb_dis_k_gi2sm = [0, 0, 0]
            nb_dis_k_gi2gm = [0, 0, 0]
            repeats_better_sod_sm2gm = []
            repeats_better_dis_k_sm2gm = []
            repeats_better_dis_k_gi2sm = []
            repeats_better_dis_k_gi2gm = []
            for repeat in range(repeats):
                print('\nrepeat =', repeat)
                random.seed(rdn_seed_list[repeat])
                median_set_idx_idx = random.sample(range(0, len(values)), k)
                median_set_idx = [values[idx] for idx in median_set_idx_idx]
                print('median set: ', median_set_idx)
                Gn_median = [Gn[g] for g in values]
                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
                        edit_costs=None, group_min=median_set_idx_idx, 
                        dataset='Letter', cost=cost, parallel=False)
                # write result detail.
                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
                if save_results:
                    f_detail = open(dir_output + fn_output_detail, 'a')
                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
                              y, repeat,
                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
                              dis_k_gi2gm, median_set_idx])
                    f_detail.close()
                # compute result summary.
                sod_sm_list.append(sod_sm)
                sod_gm_list.append(sod_gm)
                dis_k_sm_list.append(dis_k_sm)
                dis_k_gm_list.append(dis_k_gm)
                dis_k_gi_min_list.append(dis_k_gi_min)
                # # SOD SM -> GM
                if sod_sm > sod_gm:
                    nb_sod_sm2gm[0] += 1
                    repeats_better_sod_sm2gm.append(repeat)
                elif sod_sm == sod_gm:
                    nb_sod_sm2gm[1] += 1
                elif sod_sm < sod_gm:
                    nb_sod_sm2gm[2] += 1
                # # dis_k SM -> GM
                if dis_k_sm > dis_k_gm:
                    nb_dis_k_sm2gm[0] += 1
                    repeats_better_dis_k_sm2gm.append(repeat)
                elif dis_k_sm == dis_k_gm:
                    nb_dis_k_sm2gm[1] += 1
                elif dis_k_sm < dis_k_gm:
                    nb_dis_k_sm2gm[2] += 1
                # # dis_k gi -> SM
                if dis_k_gi_min > dis_k_sm:
                    nb_dis_k_gi2sm[0] += 1
                    repeats_better_dis_k_gi2sm.append(repeat)
                elif dis_k_gi_min == dis_k_sm:
                    nb_dis_k_gi2sm[1] += 1
                elif dis_k_gi_min < dis_k_sm:
                    nb_dis_k_gi2sm[2] += 1
                # # dis_k gi -> GM
                if dis_k_gi_min > dis_k_gm:
                    nb_dis_k_gi2gm[0] += 1
                    repeats_better_dis_k_gi2gm.append(repeat)
                elif dis_k_gi_min == dis_k_gm:
                    nb_dis_k_gi2gm[1] += 1
                elif dis_k_gi_min < dis_k_gm:
                    nb_dis_k_gi2gm[2] += 1
                # save median graphs.
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
                reform_attributes(G_best_kernel)
                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
                # plot median graphs.
                set_median = loadGXL(fn_pre_sm_new + '.gxl')
                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
                draw_Letter_graph(set_median, fn_pre_sm_new)
                draw_Letter_graph(gen_median, fn_pre_gm_new)
                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
            # write result summary for each letter. 
            sod_sm_mean_list.append(np.mean(sod_sm_list))
            sod_gm_mean_list.append(np.mean(sod_gm_list))
            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            if save_results:
                f_summary = open(dir_output + fn_output_summary, 'a')
                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
                f_summary.close()
        # write result summary for each letter. 
        sod_sm_mean = np.mean(sod_sm_mean_list)
        sod_gm_mean = np.mean(sod_gm_mean_list)
        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
        if save_results:
            f_summary = open(dir_output + fn_output_summary, 'a')
            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
            f_summary.close()
    print('\ncomplete.')
 def xp_letter_h():
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
    for G in Gn:
        reform_attributes(G)
 #    ds = {'name': 'Letter-high', 
 #          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
 #    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    gkernel = 'structuralspkernel'
    node_label = None
    edge_label = None
    ds_name = 'letter-h'
    dir_output = 'results/xp_letter_h/'
    save_results = False
    repeats = 1
 #    k_list = range(2, 11)
    k_list = [150]
    fit_method = 'k-graphs'
    # get indices by classes.
    y_idx = get_same_item_indices(y_all)
    if save_results:
        # create result files.
        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
        f_detail = open(dir_output + fn_output_detail, 'a')
        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
                  'dis_k gi -> GM', 'median set'])
        f_detail.close()
        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
        f_summary = open(dir_output + fn_output_summary, 'a')
        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
                  'repeats better dis_k gi -> GM'])
        f_summary.close()
    random.seed(1)
    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
    for k in k_list:
        print('\n--------- k =', k, '----------')
        sod_sm_mean_list = []
        sod_gm_mean_list = []
        dis_k_sm_mean_list = []
        dis_k_gm_mean_list = []
        dis_k_gi_min_mean_list = []
 #        nb_sod_sm2gm = [0, 0, 0]
 #        nb_dis_k_sm2gm = [0, 0, 0]
 #        nb_dis_k_gi2sm = [0, 0, 0]
 #        nb_dis_k_gi2gm = [0, 0, 0]
 #        repeats_better_sod_sm2gm = []
 #        repeats_better_dis_k_sm2gm = []
 #        repeats_better_dis_k_gi2sm = []
 #        repeats_better_dis_k_gi2gm = []
        for i, (y, values) in enumerate(y_idx.items()):
            print('\ny =', y)
 #            y = 'N'
 #            values = y_idx[y]
 #            values = values[0:10]
            k = len(values)
            sod_sm_list = []
            sod_gm_list = []
            dis_k_sm_list = []
            dis_k_gm_list = []
            dis_k_gi_min_list = []
            nb_sod_sm2gm = [0, 0, 0]
            nb_dis_k_sm2gm = [0, 0, 0]
            nb_dis_k_gi2sm = [0, 0, 0]
            nb_dis_k_gi2gm = [0, 0, 0]
            repeats_better_sod_sm2gm = []
            repeats_better_dis_k_sm2gm = []
            repeats_better_dis_k_gi2sm = []
            repeats_better_dis_k_gi2gm = []
            for repeat in range(repeats):
                print('\nrepeat =', repeat)
                random.seed(rdn_seed_list[repeat])
                median_set_idx_idx = random.sample(range(0, len(values)), k)
                median_set_idx = [values[idx] for idx in median_set_idx_idx]
                print('median set: ', median_set_idx)
                Gn_median = [Gn[g] for g in values]
                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
                        edit_costs=None, group_min=median_set_idx_idx, 
                        dataset='Letter', parallel=False)
                # write result detail.
                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
                if save_results:
                    f_detail = open(dir_output + fn_output_detail, 'a')
                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
                              y, repeat,
                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
                              dis_k_gi2gm, median_set_idx])
                    f_detail.close()
                # compute result summary.
                sod_sm_list.append(sod_sm)
                sod_gm_list.append(sod_gm)
                dis_k_sm_list.append(dis_k_sm)
                dis_k_gm_list.append(dis_k_gm)
                dis_k_gi_min_list.append(dis_k_gi_min)
                # # SOD SM -> GM
                if sod_sm > sod_gm:
                    nb_sod_sm2gm[0] += 1
                    repeats_better_sod_sm2gm.append(repeat)
                elif sod_sm == sod_gm:
                    nb_sod_sm2gm[1] += 1
                elif sod_sm < sod_gm:
                    nb_sod_sm2gm[2] += 1
                # # dis_k SM -> GM
                if dis_k_sm > dis_k_gm:
                    nb_dis_k_sm2gm[0] += 1
                    repeats_better_dis_k_sm2gm.append(repeat)
                elif dis_k_sm == dis_k_gm:
                    nb_dis_k_sm2gm[1] += 1
                elif dis_k_sm < dis_k_gm:
                    nb_dis_k_sm2gm[2] += 1
                # # dis_k gi -> SM
                if dis_k_gi_min > dis_k_sm:
                    nb_dis_k_gi2sm[0] += 1
                    repeats_better_dis_k_gi2sm.append(repeat)
                elif dis_k_gi_min == dis_k_sm:
                    nb_dis_k_gi2sm[1] += 1
                elif dis_k_gi_min < dis_k_sm:
                    nb_dis_k_gi2sm[2] += 1
                # # dis_k gi -> GM
                if dis_k_gi_min > dis_k_gm:
                    nb_dis_k_gi2gm[0] += 1
                    repeats_better_dis_k_gi2gm.append(repeat)
                elif dis_k_gi_min == dis_k_gm:
                    nb_dis_k_gi2gm[1] += 1
                elif dis_k_gi_min < dis_k_gm:
                    nb_dis_k_gi2gm[2] += 1
                # save median graphs.
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
                reform_attributes(G_best_kernel)
                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
                # plot median graphs.
                set_median = loadGXL(fn_pre_sm_new + '.gxl')
                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
                draw_Letter_graph(set_median, fn_pre_sm_new)
                draw_Letter_graph(gen_median, fn_pre_gm_new)
                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
            # write result summary for each letter. 
            sod_sm_mean_list.append(np.mean(sod_sm_list))
            sod_gm_mean_list.append(np.mean(sod_gm_list))
            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            if save_results:
                f_summary = open(dir_output + fn_output_summary, 'a')
                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
                f_summary.close()
        # write result summary for each letter. 
        sod_sm_mean = np.mean(sod_sm_mean_list)
        sod_gm_mean = np.mean(sod_gm_mean_list)
        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
        if save_results:
            f_summary = open(dir_output + fn_output_summary, 'a')
            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
            f_summary.close()
    print('\ncomplete.')
 #Dessin median courrant
 def draw_Letter_graph(graph, file_prefix):
    plt.figure()
    pos = {}
    for n in graph.nodes:
        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
    nx.draw_networkx(graph, pos)
    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
 #    plt.show()
    plt.clf()
 if __name__ == "__main__":
 #    xp_letter_h()
    xp_letter_h_LETTER2_cost()
--- a/gklearn/preimage/xp_monoterpenoides.py
+++ b/gklearn/preimage/xp_monoterpenoides.py
@@ -1,249 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Jan 16 11:03:11 2020
@author: ljia
 """
 import numpy as np
 import random
 import csv
 from shutil import copyfile
 import networkx as nx
 import matplotlib.pyplot as plt
 from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from gklearn.preimage.utils import get_same_item_indices
 from gklearn.preimage.find_best_k import getRelations
 def xp_monoterpenoides():
    import os
    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    ds = {'name': 'Letter-high', 
 #          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
 #    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    gkernel = 'treeletkernel'
    node_label = 'atom'
    edge_label = 'bond_type'
    ds_name = 'monoterpenoides'
    dir_output = 'results/xp_monoterpenoides/'
    repeats = 1
 #    k_list = range(2, 11)
    k_list = [0]
    fit_method = 'k-graphs'
    # get indices by classes.
    y_idx = get_same_item_indices(y_all)
    # create result files.
    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
    f_detail = open(dir_output + fn_output_detail, 'a')
    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
              'dis_k gi -> GM', 'median set'])
    f_detail.close()
    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
    f_summary = open(dir_output + fn_output_summary, 'a')
    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
              'repeats better dis_k gi -> GM'])
    f_summary.close()
    random.seed(1)
    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
    for k in k_list:
        print('\n--------- k =', k, '----------')
        sod_sm_mean_list = []
        sod_gm_mean_list = []
        dis_k_sm_mean_list = []
        dis_k_gm_mean_list = []
        dis_k_gi_min_mean_list = []
 #        nb_sod_sm2gm = [0, 0, 0]
 #        nb_dis_k_sm2gm = [0, 0, 0]
 #        nb_dis_k_gi2sm = [0, 0, 0]
 #        nb_dis_k_gi2gm = [0, 0, 0]
 #        repeats_better_sod_sm2gm = []
 #        repeats_better_dis_k_sm2gm = []
 #        repeats_better_dis_k_gi2sm = []
 #        repeats_better_dis_k_gi2gm = []
        for i, (y, values) in enumerate(y_idx.items()):
            print('\ny =', y)
 #            y = 'I'
 #            values = y_idx[y]
            k = len(values)
 #            k = kkk
            sod_sm_list = []
            sod_gm_list = []
            dis_k_sm_list = []
            dis_k_gm_list = []
            dis_k_gi_min_list = []
            nb_sod_sm2gm = [0, 0, 0]
            nb_dis_k_sm2gm = [0, 0, 0]
            nb_dis_k_gi2sm = [0, 0, 0]
            nb_dis_k_gi2gm = [0, 0, 0]
            repeats_better_sod_sm2gm = []
            repeats_better_dis_k_sm2gm = []
            repeats_better_dis_k_gi2sm = []
            repeats_better_dis_k_gi2gm = []
            for repeat in range(repeats):
                print('\nrepeat =', repeat)
                random.seed(rdn_seed_list[repeat])
                median_set_idx_idx = random.sample(range(0, len(values)), k)
                median_set_idx = [values[idx] for idx in median_set_idx_idx]
                print('median set: ', median_set_idx)
                Gn_median = [Gn[g] for g in values]
                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
                        edit_costs=None, group_min=median_set_idx_idx, 
                        dataset=ds_name, parallel=False)
                # write result detail.
                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
                f_detail = open(dir_output + fn_output_detail, 'a')
                csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
                          y, repeat,
                          sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
                          dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
                          dis_k_gi2gm, median_set_idx])
                f_detail.close()
                # compute result summary.
                sod_sm_list.append(sod_sm)
                sod_gm_list.append(sod_gm)
                dis_k_sm_list.append(dis_k_sm)
                dis_k_gm_list.append(dis_k_gm)
                dis_k_gi_min_list.append(dis_k_gi_min)
                # # SOD SM -> GM
                if sod_sm > sod_gm:
                    nb_sod_sm2gm[0] += 1
                    repeats_better_sod_sm2gm.append(repeat)
                elif sod_sm == sod_gm:
                    nb_sod_sm2gm[1] += 1
                elif sod_sm < sod_gm:
                    nb_sod_sm2gm[2] += 1
                # # dis_k SM -> GM
                if dis_k_sm > dis_k_gm:
                    nb_dis_k_sm2gm[0] += 1
                    repeats_better_dis_k_sm2gm.append(repeat)
                elif dis_k_sm == dis_k_gm:
                    nb_dis_k_sm2gm[1] += 1
                elif dis_k_sm < dis_k_gm:
                    nb_dis_k_sm2gm[2] += 1
                # # dis_k gi -> SM
                if dis_k_gi_min > dis_k_sm:
                    nb_dis_k_gi2sm[0] += 1
                    repeats_better_dis_k_gi2sm.append(repeat)
                elif dis_k_gi_min == dis_k_sm:
                    nb_dis_k_gi2sm[1] += 1
                elif dis_k_gi_min < dis_k_sm:
                    nb_dis_k_gi2sm[2] += 1
                # # dis_k gi -> GM
                if dis_k_gi_min > dis_k_gm:
                    nb_dis_k_gi2gm[0] += 1
                    repeats_better_dis_k_gi2gm.append(repeat)
                elif dis_k_gi_min == dis_k_gm:
                    nb_dis_k_gi2gm[1] += 1
                elif dis_k_gi_min < dis_k_gm:
                    nb_dis_k_gi2gm[2] += 1
                # save median graphs.
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
 #                reform_attributes(G_best_kernel)
                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib')
 #                # plot median graphs.
 #                set_median = loadGXL(fn_pre_sm_new + '.gxl')
 #                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
 #                draw_Letter_graph(set_median, fn_pre_sm_new)
 #                draw_Letter_graph(gen_median, fn_pre_gm_new)
 #                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
            # write result summary for each letter. 
            sod_sm_mean_list.append(np.mean(sod_sm_list))
            sod_gm_mean_list.append(np.mean(sod_gm_list))
            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
            f_summary = open(dir_output + fn_output_summary, 'a')
            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
                      sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
                      dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
                      dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
                      dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
                      nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
                      repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
                      repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
            f_summary.close()
        # write result summary for each letter. 
        sod_sm_mean = np.mean(sod_sm_mean_list)
        sod_gm_mean = np.mean(sod_gm_mean_list)
        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
        f_summary = open(dir_output + fn_output_summary, 'a')
        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
                  dis_k_gi2sm_mean, dis_k_gi2gm_mean])
        f_summary.close()
    print('\ncomplete.')
 #Dessin median courrant
 def draw_Letter_graph(graph, file_prefix):
    plt.figure()
    pos = {}
    for n in graph.nodes:
        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
    nx.draw_networkx(graph, pos)
    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
 #    plt.show()
    plt.clf()
 if __name__ == "__main__":
    xp_monoterpenoides()