Update pre-image.

5 years ago · 57e13c9c5d
--- a/notebooks/run_untilhpathkernel.py
+++ b/notebooks/run_untilhpathkernel.py
@@ -54,9 +54,11 @@ dslist = [
    #     {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
 ]
 estimator = untilhpathkernel
 param_grid_precomputed = {'depth': np.linspace(1, 10, 10),   # [2], 
                          'k_func': ['MinMax'], # ['MinMax', 'tanimoto'],
                          'compute_method': ['trie']} # ['MinMax']}
 param_grid_precomputed = {'depth': np.linspace(3, 10, 8),   # [2], 
                          'k_func': [None]} # ['MinMax', 'tanimoto'],
 #param_grid_precomputed = {'depth': np.linspace(1, 10, 10),   # [2], 
 #                          'k_func': ['MinMax'], # ['MinMax', 'tanimoto'],
 #                          'compute_method': ['trie']} # ['MinMax']}
 param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
              {'alpha': np.logspace(-10, 10, num=41, base=10)}]
--- a/notebooks/utils/plot_all_graphs.ipynb
+++ b/notebooks/utils/plot_all_graphs.ipynb
--- a/preimage/gk_iam.py
+++ b/preimage/gk_iam.py
@@ -17,8 +17,11 @@ import multiprocessing
 from tqdm import tqdm
 import networkx as nx
 import matplotlib.pyplot as plt
 import random
 from iam import iam, test_iam_with_more_graphs_as_init, test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
 import matplotlib.pyplot as plt
 from iam import iam, test_iam_with_more_graphs_as_init, iam_moreGraphsAsInit_tryAllPossibleBestGraphs
 sys.path.insert(0, "../")
 from pygraph.kernels.marginalizedKernel import marginalizedkernel
 from pygraph.kernels.untilHPathKernel import untilhpathkernel
@@ -67,7 +70,7 @@ def gk_iam(Gn, alpha):
 #        Gs_nearest = Gk + gihat_list
 #        g_tmp = iam(Gs_nearest)
 #        
 #        # compute distance between phi and the new generated graph.
 #        # compute distance between \psi and the new generated graph.
 #        knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
 #                       p_quit=lmbda, n_iteration=20, remove_totters=False,
 #                       n_jobs=multiprocessing.cpu_count(), verbose=False)
@@ -142,7 +145,7 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
        print(g_tmp.nodes(data=True))
        print(g_tmp.edges(data=True))
        # compute distance between phi and the new generated graph.
        # compute distance between \psi and the new generated graph.
        gi_list = [Gn[i] for i in idx_gi]
        knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
        dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
@@ -236,7 +239,7 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
 #        
 #        # compute distance between phi and the new generated graphs.
 #        # compute distance between \psi and the new generated graphs.
 #        gi_list = [Gn[i] for i in idx_gi]
 #        knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
 #        dnew_list = []
@@ -278,7 +281,12 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
 def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
                         gkernel, c_ei=1, c_er=1, c_es=1, epsilon=0.001):
                         gkernel, epsilon=0.001,
                         params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                                     'ite_max': 50, 'epsilon': 0.001, 
                                     'removeNodes': True, 'connected': False},
                         params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 
                                     'saveGXL': 'benoit'}):
    """This function constructs graph pre-image by the iterative pre-image 
    framework in reference [1], algorithm 1, where the step of generating new 
    graphs randomly is replaced by the IAM algorithm in reference [2].
@@ -310,7 +318,7 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
    if dis_gs[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, g0hat_list
        return 0, g0hat_list, 0, 0
    dhat = dis_gs[0] # the nearest distance
    ghat_list = [g.copy() for g in g0hat_list]
 #    for g in ghat_list:
@@ -320,31 +328,33 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
 #    for gi in Gk:
 ##        nx.draw_networkx(gi)
 ##        plt.show()
    for gi in Gk:
        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 #        nx.draw_networkx(gi)
        plt.show()
 #        draw_Letter_graph(g)
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
    Gs_nearest = Gk.copy()
        print(gi.nodes(data=True))
        print(gi.edges(data=True))
    Gs_nearest = [g.copy() for g in Gk]
    Gn_nearest_median = [g.copy() for g in Gs_nearest]
 #    gihat_list = []
 #    i = 1
    r = 0
    itr = 0
 #    cur_sod = dhat
 #    old_sod = cur_sod * 2
    sod_list = [dhat]
    itr_total = 0
 #    cur_dis = dhat
 #    old_dis = cur_dis * 2
    dis_list = [dhat]
    found = False
    nb_updated = 0
    while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_sod - cur_sod) > epsilon:
        print('\nr =', r)
        print('itr for gk =', itr, '\n')
    while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
        print('\nCurrent preimage iteration =', r)
        print('Total preimage iteration =', itr_total, '\n')
        found = False
 #        Gs_nearest = Gk + gihat_list
 #        g_tmp = iam(Gs_nearest)
        g_tmp_list, _ = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
                Gn_median, Gs_nearest, c_ei=c_ei, c_er=c_er, c_es=c_es)
        g_tmp_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs(
                Gn_nearest_median, Gs_nearest, params_ged=params_ged, **params_iam)
 #        for g in g_tmp_list:
 #            nx.draw_networkx(g)
 #            plt.show()
@@ -352,31 +362,73 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        # compute distance between phi and the new generated graphs.
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False)
        dnew_list = []
        for idx, g_tmp in enumerate(g_tmp_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), 
                            len(g_tmp_list) + len(Gn_median) + 1), alpha, knew,
                            withterm3=False))
                            len(g_tmp_list) + len(Gn_median) + 1), 
                            alpha, knew, withterm3=False))
 #        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
 #              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
 #              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
 #              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
 #        # find the new k nearest graphs.
 #        dnew_best = min(dnew_list)
 #        dis_gs = dnew_list + dis_gs # add the new nearest distances.
 #        Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
 #        sort_idx = np.argsort(dis_gs)
 #        if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
 #            print('We got new k nearest neighbors! Hurray!')
 #            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
 ##            print(dis_gs[-1])
 #            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
 #            nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
 #            if dnew_best < dhat and np.abs(dnew_best - dhat) > epsilon:
 #                print('I have smaller distance!')
 #                print(str(dhat) + '->' + str(dis_gs[0]))
 #                dhat = dis_gs[0]
 #                idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
 #                ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
 ##                for g in ghat_list:
 ###                    nx.draw_networkx(g)
 ###                    plt.show()
 ##                    draw_Letter_graph(g)
 ##                    print(g.nodes(data=True))
 ##                    print(g.edges(data=True))
 #                r = 0
 #                found = True
 #                nb_updated += 1
 #            elif np.abs(dnew_best - dhat) < epsilon:
 #                print('I have almost equal distance!')
 #                print(str(dhat) + '->' + str(dnew_best))
 #        else:
 #            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]]
 #            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
 #        Gn_nearest_median = [g.copy() for g in Gs_nearest]
 #        if not found:
 #            r += 1
        # find the new k nearest graphs.
        dnew_best = min(dnew_list)
        dis_gs = dnew_list + dis_gs # add the new nearest distances.
        Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
        sort_idx = np.argsort(dis_gs)
        if np.abs(dnew_best - dhat) >= epsilon:
            dis_gs = dnew_list + dis_gs # add the new nearest distances.
            Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
            sort_idx = np.argsort(dis_gs)
        else: # if the new distance is equal to the old one. 
            # @todo: works if only one graph is generated.
            Gs_nearest[0] = g_tmp_list[0].copy()
            sort_idx = np.argsort(dis_gs)
        if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
            print('We got better k nearest neighbors! Hurray!')
            print('We got new k nearest neighbors! Hurray!')
            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
            print(dis_gs[-1])
 #            print(dis_gs[-1])
            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
            nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
            if dnew_best < dhat and np.abs(dnew_best - dhat) > epsilon:
            if dnew_best < dhat and np.abs(dnew_best - dhat) >= epsilon:
                print('I have smaller distance!')
                print(str(dhat) + '->' + str(dis_gs[0]))
                dhat = dis_gs[0]
@@ -394,19 +446,269 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
            elif np.abs(dnew_best - dhat) < epsilon:
                print('I have almost equal distance!')
                print(str(dhat) + '->' + str(dnew_best))
        else:
            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]]
            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
        Gn_nearest_median = [g.copy() for g in Gs_nearest]
        if not found:
            r += 1
 #        old_sod = cur_sod
 #        cur_sod = dnew_best
        sod_list.append(dhat)
        itr += 1
 #        old_dis = cur_dis
 #        cur_dis = dnew_best
        dis_list.append(dhat)
        itr_total += 1
    print('\nthe graph is updated', nb_updated, 'times.')
    print('sods in kernel space:', sod_list, '\n')
    print('distances in kernel space:', dis_list, '\n')
    return dhat, ghat_list
    return dhat, ghat_list, dis_list[-1], nb_updated
 def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
                            l_max, gkernel, epsilon=0.001,
                            params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                                        'ite_max': 50, 'epsilon': 0.001, 
                                        'removeNodes': True, 'connected': False},
                            params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 
                                        'saveGXL': 'benoit'}):
    """This function constructs graph pre-image by the iterative pre-image 
    framework in reference [1], algorithm 1, where new graphs are generated 
    randomly and by the IAM algorithm in reference [2].
    notes
    -----
    Every time a set of n better graphs is acquired, their distances in kernel space are
    compared with the k nearest ones, and the k nearest distances from the k+n
    distances will be used as the new ones.
    """
    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
    # compute k nearest neighbors of phi in DN.
    dis_list = [] # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_list.append(dtemp)
    # sort
    sort_idx = np.argsort(dis_list)
    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
    if dis_gs[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, g0hat_list, 0, 0
    dhat = dis_gs[0] # the nearest distance
    ghat_list = [g.copy() for g in g0hat_list]
 #    for g in ghat_list:
 #        draw_Letter_graph(g)
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
    for gi in Gk:
        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 #        nx.draw_networkx(gi)
        plt.show()
 #        draw_Letter_graph(g)
        print(gi.nodes(data=True))
        print(gi.edges(data=True))
    Gs_nearest = [g.copy() for g in Gk]
    Gn_nearest_median = [g.copy() for g in Gs_nearest]
 #    gihat_list = []
 #    i = 1
    r = 0
    itr_total = 0
 #    cur_dis = dhat
 #    old_dis = cur_dis * 2
    dis_list = [dhat]
    found = False
    nb_updated_iam = 0
    nb_updated_random = 0
    while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
        print('Current preimage iteration =', r)
        print('Total preimage iteration =', itr_total, '\n')
        found = False
 #        Gs_nearest = Gk + gihat_list
 #        g_tmp = iam(Gs_nearest)
        g_tmp_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs(
                Gn_nearest_median, Gs_nearest, params_ged=params_ged, **params_iam)
 #        for g in g_tmp_list:
 #            nx.draw_networkx(g)
 #            plt.show()
 #            draw_Letter_graph(g)
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False)
        dnew_list = []
        for idx, g_tmp in enumerate(g_tmp_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), 
                            len(g_tmp_list) + len(Gn_median) + 1), 
                            alpha, knew, withterm3=False))
        # find the new k nearest graphs. 
        # @todo: for now only consider the situation when only one graph is generated by IAM.
        dnew_best = min(dnew_list)
        gnew_best = g_tmp_list[0].copy()
        # when new distance is equal to the old one, use random generation.
        if np.abs(dnew_best - dhat) < epsilon or dhat < dnew_best:
 #            Gs_nearest[0] = g_tmp_list[0].copy()
 #            sort_idx = np.argsort(dis_gs)
            print('Distance almost equal or worse, switching to random generation now.')
            print(str(dhat) + '->' + str(dnew_best))           
            if dnew_best > dhat and np.abs(dnew_best - dhat) >= epsilon:
                dnew_best = dhat
                gnew_best = Gs_nearest[0].copy()
            # number of edges to be changed.
            # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
 #            fdgs = dnew_best
            fdgs = nb_updated_random + 1
            if fdgs < 1:
                fdgs = 1
            fdgs = int(np.ceil(np.log(fdgs)))
            if fdgs < 1:
                fdgs += 1
 #            fdgs = nb_updated_random + 1 # @todo:
            # @todo: should we use just half of the adjacency matrix for undirected graphs?
            nb_vpairs = nx.number_of_nodes(gnew_best) * (nx.number_of_nodes(gnew_best) - 1)
            l = 0
            while l < l_max:
                # add and delete edges.
                gtemp = gnew_best.copy()
                np.random.seed()
                # which edges to change.                
                # @todo: what if fdgs is bigger than nb_vpairs?
                idx_change = random.sample(range(nb_vpairs), fdgs if 
                                           fdgs < nb_vpairs else nb_vpairs)
 #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
 #                                               (nx.number_of_nodes(gs) - 1), fdgs)
                for item in idx_change:
                    node1 = int(item / (nx.number_of_nodes(gtemp) - 1))
                    node2 = (item - node1 * (nx.number_of_nodes(gtemp) - 1))
                    if node2 >= node1: # skip the self pair.
                        node2 += 1
                    # @todo: is the randomness correct?
                    if not gtemp.has_edge(node1, node2):
                        gtemp.add_edge(node1, node2)
 #                        nx.draw_networkx(gs)
 #                        plt.show()
 #                        nx.draw_networkx(gtemp)
 #                        plt.show()
                    else:
                        gtemp.remove_edge(node1, node2)
 #                        nx.draw_networkx(gs)
 #                        plt.show()
 #                        nx.draw_networkx(gtemp)
 #                        plt.show()
 #                nx.draw_networkx(gtemp)
 #                plt.show()
                # compute distance between \psi and the new generated graph.
                knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
                dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
                # @todo: the new distance is smaller or also equal?
                if dnew < dnew_best or np.abs(dnew_best - dnew) < epsilon:
                    if np.abs(dnew_best - dnew) < epsilon:
                        print('I am equal!')
                        dnew_best = dnew
                        gnew_best = gtemp.copy()
                    else:
                        print('\nI am smaller!')
                        print('l =', str(l))
                        print(dnew_best, '->', dnew)                       
                        dis_gs = [dnew] + dis_gs # add the new nearest distances.
                        Gs_nearest = [gtemp.copy()] + Gs_nearest # add the corresponding graphs.
                        sort_idx = np.argsort(dis_gs)
                        dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
                        Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
                        Gn_nearest_median = [g.copy() for g in Gs_nearest]
                        dhat = dnew
                        nb_updated_random += 1
                        found = True # found better graph.
                        r = 0
                        print('the graph is updated by random generation', 
                              nb_updated_random, 'times.')
                        nx.draw(gtemp, labels=nx.get_node_attributes(gtemp, 'atom'), 
                                with_labels=True)
 ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                        plt.show()
                        break
 #                    nx.draw_networkx(gtemp)
 #                    plt.show()
 #                    print(gtemp.nodes(data=True))
 #                    print(gtemp.edges(data=True))
                l += 1
            if l == l_max:
                r += 1                
        else: # if the new distance is not equal to the old one. 
            dis_gs = dnew_list + dis_gs # add the new nearest distances.
            Gs_nearest = [nx.convert_node_labels_to_integers(g).copy() for g 
                          in g_tmp_list] + Gs_nearest # add the corresponding graphs.
            sort_idx = np.argsort(dis_gs)
            if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
                print('We got new k nearest neighbors! Hurray!')
                dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
    #            print(dis_gs[-1])
                Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
                nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
                if dnew_best < dhat:
                    print('I have smaller distance!')
                    print(str(dhat) + '->' + str(dis_gs[0]))
                    dhat = dis_gs[0]
                    idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
                    ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
    #                for g in ghat_list:
    ##                    nx.draw_networkx(g)
    ##                    plt.show()
    #                    draw_Letter_graph(g)
    #                    print(g.nodes(data=True))
    #                    print(g.edges(data=True))
                    r = 0
                    found = True
                    nb_updated_iam += 1
                    print('the graph is updated by IAM', nb_updated_iam, 'times.')
                    nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                            with_labels=True)
 ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                    plt.show()
            else:
                dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]]
                Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
            Gn_nearest_median = [g.copy() for g in Gs_nearest]
            if not found:
                r += 1
 #        old_dis = cur_dis
 #        cur_dis = dnew_best
        dis_list.append(dhat)
        itr_total += 1
        print('\nthe k shortest distances are', dis_gs)
        print('the shortest distances for previous iterations are', dis_list)
    print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
          nb_updated_random, 'times.')
    print('distances in kernel space:', dis_list, '\n')
    return dhat, ghat_list, dis_list[-1], nb_updated_iam, nb_updated_random
 ###############################################################################
 # useful functions.
 def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
    term1 = Kmatrix[idx_g, idx_g]
@@ -424,10 +726,10 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
 def compute_kernel(Gn, graph_kernel, verbose):
    if graph_kernel == 'marginalizedkernel':
        Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
                                  p_quit=0.03, n_iteration=20, remove_totters=False,
                                  p_quit=0.03, n_iteration=10, remove_totters=False,
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'untilhpathkernel':
        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type',
        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
                                  depth=10, k_func='MinMax', compute_method='trie',
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'spkernel':
--- a/preimage/iam.py
+++ b/preimage/iam.py
@@ -20,7 +20,424 @@ from pygraph.utils.graphdataset import get_dataset_attributes
 from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
 #from pygraph.utils.utils import graph_deepcopy
 def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, 
        c_ei=3, c_er=3, c_es=1, ite_max=50, epsilon=0.001, 
        node_label='atom', edge_label='bond_type', 
        connected=False, removeNodes=True, AllBestInit=True,
        params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 'saveGXL': 'benoit'}):
    """See my name, then you know what I do.
    """
    from tqdm import tqdm
 #    Gn_median = Gn_median[0:10]
 #    Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
    if removeNodes:
        node_ir = np.inf # corresponding to the node remove and insertion.
        label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
    ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, 
                                      attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], 
                                      edge_label=edge_label)
    def generate_graph(G, pi_p_forward, label_set):
        G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
 #        nx.draw_networkx(G)
 #        import matplotlib.pyplot as plt
 #        plt.show()
 #        print(pi_p_forward)
        # update vertex labels.
        # pre-compute h_i0 for each label.
 #        for label in get_node_labels(Gn, node_label):
 #            print(label)
 #        for nd in G.nodes(data=True):
 #            pass
        if not ds_attrs['node_attr_dim']: # labels are symbolic
            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
                h_i0_list = []
                label_list = []
                for label in label_set:
                    h_i0 = 0
                    for idx, g in enumerate(Gn_median):
                        pi_i = pi_p_forward[idx][ndi]
                        if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
                            h_i0 += 1
                    h_i0_list.append(h_i0)
                    label_list.append(label)
                # case when the node is to be removed.
                if removeNodes:
                    h_i0_remove = 0 # @todo: maybe this can be added to the label_set above.
                    for idx, g in enumerate(Gn_median):
                        pi_i = pi_p_forward[idx][ndi]
                        if pi_i == node_ir:
                            h_i0_remove += 1
                    h_i0_list.append(h_i0_remove)
                    label_list.append(label_r)
                # get the best labels.
                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
                nlabel_best = [label_list[idx] for idx in idx_max]
                # generate "best" graphs with regard to "best" node labels.
                G_new_list_nd = []
                for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
                    for nl in nlabel_best:
                        g_tmp = g.copy()
                        if nl == label_r:
                            g_tmp.remove_node(nd)
                        else:
                            g_tmp.nodes[nd][node_label] = nl
                        G_new_list_nd.append(g_tmp)
 #                            nx.draw_networkx(g_tmp)
 #                            import matplotlib.pyplot as plt
 #                            plt.show()
 #                            print(g_tmp.nodes(data=True))
 #                            print(g_tmp.edges(data=True))
                G_new_list = [ggg.copy() for ggg in G_new_list_nd]
        else: # labels are non-symbolic
            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
                Si_norm = 0
                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
                for idx, g in enumerate(Gn_median):
                    pi_i = pi_p_forward[idx][ndi]
                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
                        Si_norm += 1
                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
                phi_i_bar /= Si_norm
                G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
 #        for g in G_new_list:
 #            import matplotlib.pyplot as plt 
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        # update edge labels and adjacency matrix.
        if ds_attrs['edge_labeled']:
            G_new_list_edge = []
            for g_new in G_new_list:
                nd_list = [n for n in g_new.nodes()]
                g_tmp_list = [g_new.copy()]
                for nd1i in range(nx.number_of_nodes(g_new)): 
                    nd1 = nd_list[nd1i]# @todo: not just edges, but all pairs of nodes
                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_new)):
                        nd2 = nd_list[nd2i]
 #                for nd1, nd2, _ in g_new.edges(data=True): 
                        h_ij0_list = []
                        label_list = []
                        # @todo: compute edge label set before.
                        for label in get_edge_labels(Gn_median, edge_label):
                            h_ij0 = 0
                            for idx, g in enumerate(Gn_median):
                                pi_i = pi_p_forward[idx][nd1i]
                                pi_j = pi_p_forward[idx][nd2i]
                                h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
                                           g.has_edge(pi_i, pi_j) and 
                                           g.edges[pi_i, pi_j][edge_label] == label)
                                h_ij0 += h_ij0_p
                            h_ij0_list.append(h_ij0)
                            label_list.append(label)
    #                    # case when the edge is to be removed.
    #                    h_ij0_remove = 0
    #                    for idx, g in enumerate(Gn_median):
    #                        pi_i = pi_p_forward[idx][nd1i]
    #                        pi_j = pi_p_forward[idx][nd2i]
    #                        if g.has_node(pi_i) and g.has_node(pi_j) and not 
    #                            g.has_edge(pi_i, pi_j):
    #                            h_ij0_remove += 1
    #                    h_ij0_list.append(h_ij0_remove)
    #                    label_list.append(label_r)
                        # get the best labels.
                        # choose all best graphs.
                        idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
                        elabel_best = [label_list[idx] for idx in idx_max]
                        h_ij0_max = [h_ij0_list[idx] for idx in idx_max]
                        # generate "best" graphs with regard to "best" node labels.
                        G_new_list_ed = []
                        for g_tmp in g_tmp_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
                            for idxl, el in enumerate(elabel_best):
                                g_tmp_copy = g_tmp.copy()
                                # check whether a_ij is 0 or 1.
                                sij_norm = 0
                                for idx, g in enumerate(Gn_median):
                                    pi_i = pi_p_forward[idx][nd1i]
                                    pi_j = pi_p_forward[idx][nd2i]
                                    if g.has_node(pi_i) and g.has_node(pi_j) and \
                                        g.has_edge(pi_i, pi_j):
                                       sij_norm += 1
                                if h_ij0_max[idxl] > len(Gn_median) * c_er / c_es + \
                                    sij_norm * (1 - (c_er + c_ei) / c_es):
                                    if not g_tmp_copy.has_edge(nd1, nd2):
                                        g_tmp_copy.add_edge(nd1, nd2)
                                    g_tmp_copy.edges[nd1, nd2][edge_label] = elabel_best[idxl]
                                else:
                                    if g_tmp_copy.has_edge(nd1, nd2):
                                        g_tmp_copy.remove_edge(nd1, nd2)
                                G_new_list_ed.append(g_tmp_copy)
                        g_tmp_list = [ggg.copy() for ggg in G_new_list_ed]  
                G_new_list_edge += g_tmp_list
            G_new_list = [ggg.copy() for ggg in G_new_list_edge]    
 #                        # choose one of the best randomly.
 #                        idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
 #                        h_ij0_max = h_ij0_list[idx_max[0]]
 #                        idx_rdm = random.randint(0, len(idx_max) - 1)
 #                        best_label = label_list[idx_max[idx_rdm]]
 #                               
 #                        # check whether a_ij is 0 or 1.
 #                        sij_norm = 0
 #                        for idx, g in enumerate(Gn_median):
 #                            pi_i = pi_p_forward[idx][nd1i]
 #                            pi_j = pi_p_forward[idx][nd2i]
 #                            if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
 #                               sij_norm += 1
 #                        if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
 #                            if not g_new.has_edge(nd1, nd2):
 #                                g_new.add_edge(nd1, nd2)
 #                            g_new.edges[nd1, nd2][edge_label] = best_label
 #                        else:
 #                            if g_new.has_edge(nd1, nd2):
 #                                g_new.remove_edge(nd1, nd2)                
        else: # if edges are unlabeled
            # @todo: is this even right? G or g_tmp? check if the new one is right
            # @todo: works only for undirected graphs.
            for g_tmp in G_new_list:
                nd_list = [n for n in g_tmp.nodes()]
                for nd1i in range(nx.number_of_nodes(g_tmp)):
                    nd1 = nd_list[nd1i]
                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_tmp)):
                        nd2 = nd_list[nd2i]
                        sij_norm = 0
                        for idx, g in enumerate(Gn_median):
                            pi_i = pi_p_forward[idx][nd1i]
                            pi_j = pi_p_forward[idx][nd2i]
                            if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                               sij_norm += 1
                        if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
                            # @todo: should we consider if nd1 and nd2 in g_tmp?
                            # or just add the edge anyway?
                            if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
                                and not g_tmp.has_edge(nd1, nd2):
                                g_tmp.add_edge(nd1, nd2)
 #                        else: # @todo: which to use?
                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
                            if g_tmp.has_edge(nd1, nd2):
                                g_tmp.remove_edge(nd1, nd2)
                        # do not change anything when equal.     
 #        for i, g in enumerate(G_new_list):
 #            import matplotlib.pyplot as plt 
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
 #        # find the best graph generated in this iteration and update pi_p.
        # @todo: should we update all graphs generated or just the best ones?
        dis_list, pi_forward_list = median_distance(G_new_list, Gn_median, 
            **params_ged)
        # @todo: should we remove the identical and connectivity check? 
        # Don't know which is faster.
        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
            G_new_list, idx_list = remove_duplicates(G_new_list)
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
            dis_list = [dis_list[idx] for idx in idx_list]
 #        if connected == True:
 #            G_new_list, idx_list = remove_disconnected(G_new_list)
 #            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
 #        dis_min = dis_list[idx_min_tmp_list[0]]
 #        pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
 #        G_new_list = [G_new_list[idx] for idx in idx_min_list] 
 #        for g in G_new_list:
 #            import matplotlib.pyplot as plt 
 #            nx.draw_networkx(g)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        return G_new_list, pi_forward_list, dis_list
    def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
        idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
        dis_min = dis_all[idx_min_list[0]]
        pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
        G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
        return G_min_list, pi_forward_min_list, dis_min
    def iteration_proc(G, pi_p_forward, cur_sod):
        G_list = [G]
        pi_forward_list = [pi_p_forward]
        old_sod = cur_sod * 2
        sod_list = [cur_sod]
        dis_list = [cur_sod]
        # iterations.
        itr = 0
        # @todo: what if difference == 0?
 #        while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or
 #                                 np.abs(old_sod - cur_sod) == 0):
        while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
 #        for itr in range(0, 5): # the convergence condition?
            print('itr_iam is', itr)
            G_new_list = []
            pi_forward_new_list = []
            dis_new_list = []
            for idx, g in enumerate(G_list):
                label_set = get_node_labels(Gn_median + [g], node_label)                        
                G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
                    g, pi_forward_list[idx], label_set)
                G_new_list += G_tmp_list
                pi_forward_new_list += pi_forward_tmp_list
                dis_new_list += dis_tmp_list
            # @todo: need to remove duplicates here?
            G_list = [ggg.copy() for ggg in G_new_list]
            pi_forward_list = [pitem.copy() for pitem in pi_forward_new_list]
            dis_list = dis_new_list[:]
            old_sod = cur_sod
            cur_sod = np.min(dis_list)
            sod_list.append(cur_sod)
            itr += 1
        # @todo: do we return all graphs or the best ones?
        # get the best ones of the generated graphs.
        G_list, pi_forward_list, dis_min = best_median_graphs(
            G_list, pi_forward_list, dis_list)
        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
            G_list, idx_list = remove_duplicates(G_list)
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #            dis_list = [dis_list[idx] for idx in idx_list]
 #        import matplotlib.pyplot as plt
 #        for g in G_list:             
 #            nx.draw_networkx(g)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        print('\nsods:', sod_list, '\n')
        return G_list, pi_forward_list, dis_min
    def remove_duplicates(Gn):
        """Remove duplicate graphs from list.
        """
        Gn_new = []
        idx_list = []
        for idx, g in enumerate(Gn):
            dupl = False
            for g_new in Gn_new:
                if graph_isIdentical(g_new, g):
                    dupl = True
                    break
            if not dupl:
                Gn_new.append(g)
                idx_list.append(idx)
        return Gn_new, idx_list
    def remove_disconnected(Gn):
        """Remove disconnected graphs from list.
        """
        Gn_new = []
        idx_list = []
        for idx, g in enumerate(Gn):
            if nx.is_connected(g):
                Gn_new.append(g)
                idx_list.append(idx)
        return Gn_new, idx_list
    # phase 1: initilize.
    # compute set-median.
    dis_min = np.inf
    dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median,
        **params_ged)
    # find all smallest distances.
    if AllBestInit: # try all best init graphs.
        idx_min_list = range(len(dis_list))
        dis_min = dis_list
    else:
        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
        dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list)
    # phase 2: iteration.
    G_list = []
    dis_list = []
    pi_forward_list = []
    for idx_tmp, idx_min in enumerate(idx_min_list):
 #        print('idx_min is', idx_min)
        G = Gn_candidate[idx_min].copy()
        # list of edit operations.        
        pi_p_forward = pi_forward_all[idx_min]
 #        pi_p_backward = pi_all_backward[idx_min]        
        Gi_list, pi_i_forward_list, dis_i_min = iteration_proc(G, pi_p_forward, dis_min[idx_tmp])            
        G_list += Gi_list
        dis_list += [dis_i_min] * len(Gi_list)
        pi_forward_list += pi_i_forward_list
    if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
        G_list, idx_list = remove_duplicates(G_list)
        dis_list = [dis_list[idx] for idx in idx_list]
        pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
    if connected == True:
        G_list_con, idx_list = remove_disconnected(G_list)
        # if there is no connected graphs at all, then remain the disconnected ones.
        if len(G_list_con) > 0: # @todo: ??????????????????????????
            G_list = G_list_con
            dis_list = [dis_list[idx] for idx in idx_list]
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #    import matplotlib.pyplot as plt 
 #    for g in G_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    # get the best median graphs
 #    dis_list, pi_forward_list = median_distance(G_list, Gn_median,
 #        **params_ged)
    G_min_list, pi_forward_min_list, dis_min = best_median_graphs(
            G_list, pi_forward_list, dis_list)
 #    for g in G_min_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    # randomly choose one graph.
    idx_rdm = random.randint(0, len(G_min_list) - 1)
    G_min_list = [G_min_list[idx_rdm]]   
    return G_min_list, dis_min
 ###############################################################################
 def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', 
        connected=True):
    """See my name, then you know what I do.
@@ -148,27 +565,42 @@ def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type',
    return G
 def GED(g1, g2, lib='gedlib'):
 def GED(g1, g2, lib='gedlib', cost='CHEM_1', method='IPFP', saveGXL='benoit', 
        stabilizer='min'):
    """
    Compute GED.
    """
    if lib == 'gedlib':
        # transform dataset to the 'xml' file as the GedLib required.
        saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp')
 #        script.appel()
        saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp',
                    xparams={'method': saveGXL})
    #        script.appel()
        script.PyRestartEnv()
        script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml')
        listID = script.PyGetGraphIds()
        script.PySetEditCost("LETTER") #("CHEM_1")
        script.PySetEditCost(cost) #("CHEM_1")
        script.PyInitEnv()
        script.PySetMethod("IPFP", "")
        script.PySetMethod(method, "")
        script.PyInitMethod()
        g = listID[0]
        h = listID[1]
        script.PyRunMethod(g, h)
        pi_forward, pi_backward = script.PyGetAllMap(g, h)
        upper = script.PyGetUpperBound(g, h)
        lower = script.PyGetLowerBound(g, h)        
        if stabilizer == None:
            script.PyRunMethod(g, h)
            pi_forward, pi_backward = script.PyGetAllMap(g, h)
            upper = script.PyGetUpperBound(g, h)
            lower = script.PyGetLowerBound(g, h)        
        elif stabilizer == 'min':
            upper = np.inf
            for itr in range(50):                
                script.PyRunMethod(g, h)                
                upper_tmp = script.PyGetUpperBound(g, h)                
                if upper_tmp < upper:
                    upper = upper_tmp
                    pi_forward, pi_backward = script.PyGetAllMap(g, h)
                    lower = script.PyGetLowerBound(g, h)
                if upper == 0:
                    break
        dis = upper
        # make the map label correct (label remove map as np.inf)
@@ -177,12 +609,13 @@ def GED(g1, g2, lib='gedlib'):
        nb1 = nx.number_of_nodes(g1)
        nb2 = nx.number_of_nodes(g2)
        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]                
        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
    return dis, pi_forward, pi_backward
 def median_distance(Gn, Gn_median, measure='ged', verbose=False):
 def median_distance(Gn, Gn_median, measure='ged', verbose=False, 
                    ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
    dis_list = []
    pi_forward_list = []
    for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
@@ -190,7 +623,8 @@ def median_distance(Gn, Gn_median, measure='ged', verbose=False):
        dis_sum = 0
        pi_forward_list.append([])
        for G_p in Gn_median:
            dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
            dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
                cost=ged_cost, method=ged_method, saveGXL=saveGXL)
            pi_forward_list[idx].append(pi_tmp_forward)
            dis_sum += dis_tmp
        dis_list.append(dis_sum)
@@ -228,137 +662,13 @@ def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1,
    # list of edit operations.        
    pi_p_forward = pi_all_forward[idx_min]
    pi_p_backward = pi_all_backward[idx_min]
    # phase 2: iteration.
    ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], 
                                      edge_label=edge_label)
    label_set = get_node_labels(Gn + [G], node_label)
    for itr in range(0, 10): # @todo: the convergence condition?
        G_new = G.copy()
        # update vertex labels.
        # pre-compute h_i0 for each label.
 #        for label in get_node_labels(Gn, node_label):
 #            print(label)
 #        for nd in G.nodes(data=True):
 #            pass
        if not ds_attrs['node_attr_dim']: # labels are symbolic
            for nd in G.nodes():
                h_i0_list = []
                label_list = []
                for label in label_set:
                    h_i0 = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd]
                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
                            h_i0 += 1
                    h_i0_list.append(h_i0)
                    label_list.append(label)
                # choose one of the best randomly.
                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
                idx_rdm = random.randint(0, len(idx_max) - 1)
                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
        else: # labels are non-symbolic
            for nd in G.nodes():
                Si_norm = 0
                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
                for idx, g in enumerate(Gn):
                    pi_i = pi_p_forward[idx][nd]
                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
                        Si_norm += 1
                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
                phi_i_bar /= Si_norm
                G_new.nodes[nd]['attributes'] = phi_i_bar
        # update edge labels and adjacency matrix.
        if ds_attrs['edge_labeled']:
            for nd1, nd2, _ in G.edges(data=True):
                h_ij0_list = []
                label_list = []
                for label in get_edge_labels(Gn, edge_label):
                    h_ij0 = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd1]
                        pi_j = pi_p_forward[idx][nd2]
                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
                                   g.has_edge(pi_i, pi_j) and 
                                   g.edges[pi_i, pi_j][edge_label] == label)
                        h_ij0 += h_ij0_p
                    h_ij0_list.append(h_ij0)
                    label_list.append(label)
                # choose one of the best randomly.
                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
                h_ij0_max = h_ij0_list[idx_max[0]]
                idx_rdm = random.randint(0, len(idx_max) - 1)
                best_label = label_list[idx_max[idx_rdm]]
                # check whether a_ij is 0 or 1.
                sij_norm = 0
                for idx, g in enumerate(Gn):
                    pi_i = pi_p_forward[idx][nd1]
                    pi_j = pi_p_forward[idx][nd2]
                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                       sij_norm += 1
                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                    if not G_new.has_edge(nd1, nd2):
                        G_new.add_edge(nd1, nd2)
                    G_new.edges[nd1, nd2][edge_label] = best_label
                else:
                    if G_new.has_edge(nd1, nd2):
                        G_new.remove_edge(nd1, nd2)                
        else: # if edges are unlabeled
            # @todo: works only for undirected graphs.
            for nd1 in range(nx.number_of_nodes(G)):
                for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
                    sij_norm = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd1]
                        pi_j = pi_p_forward[idx][nd2]
                        if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                           sij_norm += 1
                    if sij_norm > len(Gn) * c_er / (c_er + c_ei):
                        if not G_new.has_edge(nd1, nd2):
                            G_new.add_edge(nd1, nd2)
                    elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
                        if G_new.has_edge(nd1, nd2):
                            G_new.remove_edge(nd1, nd2)
                    # do not change anything when equal.
        G = G_new.copy()
        # update pi_p
        pi_p_forward = []
        for G_p in Gn:
            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
            pi_p_forward.append(pi_tmp_forward)
    return G
 def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
        Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, node_label='atom', 
        edge_label='bond_type', connected=False):
    """See my name, then you know what I do.
    """
    from tqdm import tqdm
 #    Gn_median = Gn_median[0:10]
 #    Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
    node_ir = np.inf # corresponding to the node remove and insertion.
    label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
    ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, 
                                      attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], 
    # phase 2: iteration.
    ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], 
                                      edge_label=edge_label)
    ite_max = 50
    epsilon = 0.001
    def generate_graph(G, pi_p_forward, label_set):
        G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
 #        nx.draw_networkx(G)
 #        import matplotlib.pyplot as plt
 #        plt.show()
 #        print(pi_p_forward)
    label_set = get_node_labels(Gn + [G], node_label)
    for itr in range(0, 10): # @todo: the convergence condition?
        G_new = G.copy()
        # update vertex labels.
        # pre-compute h_i0 for each label.
 #        for label in get_node_labels(Gn, node_label):
@@ -366,65 +676,41 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
 #        for nd in G.nodes(data=True):
 #            pass
        if not ds_attrs['node_attr_dim']: # labels are symbolic
            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
            for nd in G.nodes():
                h_i0_list = []
                label_list = []
                for label in label_set:
                    h_i0 = 0
                    for idx, g in enumerate(Gn_median):
                        pi_i = pi_p_forward[idx][ndi]
                        if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd]
                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
                            h_i0 += 1
                    h_i0_list.append(h_i0)
                    label_list.append(label)
                # case when the node is to be removed.
                h_i0_remove = 0
                for idx, g in enumerate(Gn_median):
                    pi_i = pi_p_forward[idx][ndi]
                    if pi_i == node_ir:
                        h_i0_remove += 1
                h_i0_list.append(h_i0_remove)
                label_list.append(label_r)
                # get the best labels.
                # choose one of the best randomly.
                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
                nlabel_best = [label_list[idx] for idx in idx_max]
                # generate "best" graphs with regard to "best" node labels.
                G_new_list_nd = []
                for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
                    for nl in nlabel_best:
                        g_tmp = g.copy()
                        if nl == label_r:
                            g_tmp.remove_node(nd)
                        else:
                            g_tmp.nodes[nd][node_label] = nl
                        G_new_list_nd.append(g_tmp)
 #                            nx.draw_networkx(g_tmp)
 #                            import matplotlib.pyplot as plt
 #                            plt.show()
 #                            print(g_tmp.nodes(data=True))
 #                            print(g_tmp.edges(data=True))
                G_new_list = G_new_list_nd[:]
                idx_rdm = random.randint(0, len(idx_max) - 1)
                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
        else: # labels are non-symbolic
            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
            for nd in G.nodes():
                Si_norm = 0
                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
                for idx, g in enumerate(Gn_median):
                    pi_i = pi_p_forward[idx][ndi]
                for idx, g in enumerate(Gn):
                    pi_i = pi_p_forward[idx][nd]
                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
                        Si_norm += 1
                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
                phi_i_bar /= Si_norm
                G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
                G_new.nodes[nd]['attributes'] = phi_i_bar
        # update edge labels and adjacency matrix.
        if ds_attrs['edge_labeled']:
            for nd1, nd2, _ in G.edges(data=True):
                h_ij0_list = []
                label_list = []
                for label in get_edge_labels(Gn_median, edge_label):
                for label in get_edge_labels(Gn, edge_label):
                    h_ij0 = 0
                    for idx, g in enumerate(Gn_median):
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd1]
                        pi_j = pi_p_forward[idx][nd2]
                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
@@ -441,12 +727,12 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
                # check whether a_ij is 0 or 1.
                sij_norm = 0
                for idx, g in enumerate(Gn_median):
                for idx, g in enumerate(Gn):
                    pi_i = pi_p_forward[idx][nd1]
                    pi_j = pi_p_forward[idx][nd2]
                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                       sij_norm += 1
                if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                    if not G_new.has_edge(nd1, nd2):
                        G_new.add_edge(nd1, nd2)
                    G_new.edges[nd1, nd2][edge_label] = best_label
@@ -455,197 +741,36 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
                        G_new.remove_edge(nd1, nd2)                
        else: # if edges are unlabeled
            # @todo: works only for undirected graphs.
            nd_list = [n for n in G.nodes()]
            for g_tmp in G_new_list:
                for nd1i in range(nx.number_of_nodes(G)):
                    nd1 = nd_list[nd1i]
                    for nd2i in range(nd1i + 1, nx.number_of_nodes(G)):
                        nd2 = nd_list[nd2i]
                        sij_norm = 0
                        for idx, g in enumerate(Gn_median):
                            pi_i = pi_p_forward[idx][nd1i]
                            pi_j = pi_p_forward[idx][nd2i]
                            if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                               sij_norm += 1
                        if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
                            # @todo: should we consider if nd1 and nd2 in g_tmp?
                            # or just add the edge anyway?
                            if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
                                and not g_tmp.has_edge(nd1, nd2):
                                g_tmp.add_edge(nd1, nd2)
                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
                            if g_tmp.has_edge(nd1, nd2):
                                g_tmp.remove_edge(nd1, nd2)
                        # do not change anything when equal.                        
 #        # find the best graph generated in this iteration and update pi_p.
        # @todo: should we update all graphs generated or just the best ones?
        dis_list, pi_forward_list = median_distance(G_new_list, Gn_median)
        # @todo: should we remove the identical and connectivity check? 
        # Don't know which is faster.
        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
            G_new_list, idx_list = remove_duplicates(G_new_list)
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
            dis_list = [dis_list[idx] for idx in idx_list]
 #        if connected == True:
 #            G_new_list, idx_list = remove_disconnected(G_new_list)
 #            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
 #        dis_min = dis_list[idx_min_tmp_list[0]]
 #        pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
 #        G_new_list = [G_new_list[idx] for idx in idx_min_list] 
 #        for g in G_new_list:
 #            import matplotlib.pyplot as plt 
 #            nx.draw_networkx(g)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        return G_new_list, pi_forward_list, dis_list
    def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
        idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
        dis_min = dis_all[idx_min_list[0]]
        pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
        G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
        return G_min_list, pi_forward_min_list, dis_min
    def iteration_proc(G, pi_p_forward, cur_sod):
        G_list = [G]
        pi_forward_list = [pi_p_forward]
        old_sod = cur_sod * 2
        sod_list = [cur_sod]
        # iterations.
        itr = 0
        while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
 #        for itr in range(0, 5): # the convergence condition?
            print('itr is', itr)
            G_new_list = []
            pi_forward_new_list = []
            dis_new_list = []
            for idx, G in enumerate(G_list):
                label_set = get_node_labels(Gn_median + [G], node_label)                        
                G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
                    G, pi_forward_list[idx], label_set)
                G_new_list += G_tmp_list
                pi_forward_new_list += pi_forward_tmp_list
                dis_new_list += dis_tmp_list
            G_list = G_new_list[:]
            pi_forward_list = pi_forward_new_list[:]
            dis_list = dis_new_list[:]
            old_sod = cur_sod
            cur_sod = np.min(dis_list)
            sod_list.append(cur_sod)
            itr += 1
        # @todo: do we return all graphs or the best ones?
        # get the best ones of the generated graphs.
        G_list, pi_forward_list, dis_min = best_median_graphs(
            G_list, pi_forward_list, dis_list)
            for nd1 in range(nx.number_of_nodes(G)):
                for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
                    sij_norm = 0
                    for idx, g in enumerate(Gn):
                        pi_i = pi_p_forward[idx][nd1]
                        pi_j = pi_p_forward[idx][nd2]
                        if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
                           sij_norm += 1
                    if sij_norm > len(Gn) * c_er / (c_er + c_ei):
                        if not G_new.has_edge(nd1, nd2):
                            G_new.add_edge(nd1, nd2)
                    elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
                        if G_new.has_edge(nd1, nd2):
                            G_new.remove_edge(nd1, nd2)
                    # do not change anything when equal.
        G = G_new.copy()
        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
            G_list, idx_list = remove_duplicates(G_list)
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #            dis_list = [dis_list[idx] for idx in idx_list]
 #        import matplotlib.pyplot as plt
 #        for g in G_list:             
 #            nx.draw_networkx(g)
 #            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
        print('\nsods:', sod_list, '\n')
        return G_list, pi_forward_list, dis_min
    def remove_duplicates(Gn):
        """Remove duplicate graphs from list.
        """
        Gn_new = []
        idx_list = []
        for idx, g in enumerate(Gn):
            dupl = False
            for g_new in Gn_new:
                if graph_isIdentical(g_new, g):
                    dupl = True
                    break
            if not dupl:
                Gn_new.append(g)
                idx_list.append(idx)
        return Gn_new, idx_list
        # update pi_p
        pi_p_forward = []
        for G_p in Gn:
            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
            pi_p_forward.append(pi_tmp_forward)
    def remove_disconnected(Gn):
        """Remove disconnected graphs from list.
        """
        Gn_new = []
        idx_list = []
        for idx, g in enumerate(Gn):
            if nx.is_connected(g):
                Gn_new.append(g)
                idx_list.append(idx)
        return Gn_new, idx_list
    return G
 ###############################################################################
    # phase 1: initilize.
    # compute set-median.
    dis_min = np.inf
    dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median)
    # find all smallest distances.
    idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
    dis_min = dis_list[idx_min_list[0]]
    # phase 2: iteration.
    G_list = []
    dis_list = []
    pi_forward_list = []
    for idx_min in idx_min_list:
 #        print('idx_min is', idx_min)
        G = Gn_candidate[idx_min].copy()
        # list of edit operations.        
        pi_p_forward = pi_forward_all[idx_min]
 #        pi_p_backward = pi_all_backward[idx_min]        
        Gi_list, pi_i_forward_list, dis_i_min = iteration_proc(G, pi_p_forward, dis_min)            
        G_list += Gi_list
        dis_list.append(dis_i_min)
        pi_forward_list += pi_i_forward_list
    if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
        G_list, idx_list = remove_duplicates(G_list)
        dis_list = [dis_list[idx] for idx in idx_list]
        pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
    if connected == True:
        G_list_con, idx_list = remove_disconnected(G_list)
        # if there is no connected graphs at all, then remain the disconnected ones.
        if len(G_list_con) > 0: # @todo: ??????????????????????????
            G_list = G_list_con
            dis_list = [dis_list[idx] for idx in idx_list]
            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
 #    import matplotlib.pyplot as plt 
 #    for g in G_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    # get the best median graphs
 #    dis_list, pi_forward_list = median_distance(G_list, Gn_median)
    G_min_list, pi_forward_min_list, dis_min = best_median_graphs(
            G_list, pi_forward_list, dis_list)
 #    for g in G_min_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    return G_min_list, dis_min
 if __name__ == '__main__':
--- a/preimage/median.py
+++ b/preimage/median.py
@@ -5,10 +5,10 @@ import numpy as np
 import networkx as nx
 import time
 #import librariesImport
 #import script
 #sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
 #import pygraph
 import librariesImport
 import script
 sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
 import pygraph
 from pygraph.utils.graphfiles import loadDataset
 def replace_graph_in_env(script, graph, old_id, label='median'):
@@ -191,28 +191,28 @@ def compute_median_set(script,listID):
    return median_set_index, sod
 #if __name__ == "__main__":
 #    #Chargement du dataset
 #    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
 #    script.PySetEditCost("LETTER")
 #    script.PyInitEnv()
 #    script.PySetMethod("IPFP", "")
 #    script.PyInitMethod()
 #
 #    dataset,my_y = pygraph.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
 #
 #    listID = script.PyGetAllGraphIds()
 #    median, sod = compute_median(script,listID,dataset,verbose=True)
 #    
 #    print(sod)
 #    draw_Letter_graph(median)
 if __name__ == '__main__':
    # test draw_Letter_graph
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    print(y_all)
    for g in Gn:
        draw_Letter_graph(g)
 if __name__ == "__main__":
    #Chargement du dataset
    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
    script.PySetEditCost("LETTER")
    script.PyInitEnv()
    script.PySetMethod("IPFP", "")
    script.PyInitMethod()
    dataset,my_y = pygraph.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
    listID = script.PyGetAllGraphIds()
    median, sod = compute_median(script,listID,dataset,verbose=True)
    print(sod)
    draw_Letter_graph(median)
 #if __name__ == '__main__':
 #    # test draw_Letter_graph
 #    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
 #          'extra_params': {}} # node nsymb
 #    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    print(y_all)
 #    for g in Gn:
 #        draw_Letter_graph(g)
--- a/preimage/preimage.py
+++ b/preimage/preimage.py
@@ -25,14 +25,16 @@ import functools
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
 from pygraph.kernels.structuralspKernel import structuralspkernel
 from gk_iam import dis_gstar
 def compute_kernel(Gn, graph_kernel, verbose):
    if graph_kernel == 'marginalizedkernel':
        Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
                                  p_quit=0.03, n_iteration=20, remove_totters=False,
                                  p_quit=0.03, n_iteration=10, remove_totters=False,
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'untilhpathkernel':
        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type',
        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
                                  depth=10, k_func='MinMax', compute_method='trie',
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'spkernel':
@@ -47,34 +49,167 @@ def compute_kernel(Gn, graph_kernel, verbose):
                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    # normalization
 #    Kmatrix_diag = Kmatrix.diagonal().copy()
 #    for i in range(len(Kmatrix)):
 #        for j in range(i, len(Kmatrix)):
 #            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
 #            Kmatrix[j][i] = Kmatrix[i][j]
    Kmatrix_diag = Kmatrix.diagonal().copy()
    for i in range(len(Kmatrix)):
        for j in range(i, len(Kmatrix)):
            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
            Kmatrix[j][i] = Kmatrix[i][j]
    return Kmatrix
 def random_preimage(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
    # compute k nearest neighbors of phi in DN.
    dis_list = [] # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_list.append(dtemp)
 #    print(np.max(dis_list))
 #    print(np.min(dis_list))
 #    print(np.min([item for item in dis_list if item != 0]))
 #    print(np.mean(dis_list))
    # sort
    sort_idx = np.argsort(dis_list)
    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
    if dis_gs[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, g0hat_list[0], 0
    dhat = dis_gs[0] # the nearest distance
 #    ghat_list = [g.copy() for g in g0hat_list]
 #    for g in ghat_list:
 #        draw_Letter_graph(g)
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
 #    for gi in Gk:
 ##        nx.draw_networkx(gi)
 ##        plt.show()
 #        draw_Letter_graph(g)
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
    Gs_nearest = [g.copy() for g in Gk]
    gihat_list = []
    dihat_list = []
 #    i = 1
    r = 0
 #    sod_list = [dhat]
 #    found = False
    nb_updated = 0
    g_best = []
    while r < r_max:
        print('\nr =', r)
        print('itr for gk =', nb_updated, '\n')
        found = False
        dis_bests = dis_gs + dihat_list
        # @todo what if the log is negetive? how to choose alpha (scalar)?
        fdgs_list = np.array(dis_bests)
        if np.min(fdgs_list) < 1:
            fdgs_list /= np.min(dis_bests)
        fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
        if np.min(fdgs_list) < 1:
            fdgs_list = np.array(fdgs_list) + 1
        for ig, gs in enumerate(Gs_nearest + gihat_list):
 #            nx.draw_networkx(gs)
 #            plt.show()
            for trail in range(0, l):
 #            for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
                # add and delete edges.
                gtemp = gs.copy()
                np.random.seed()
                # which edges to change.
                # @todo: should we use just half of the adjacency matrix for undirected graphs?
                nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
                # @todo: what if fdgs is bigger than nb_vpairs?
                idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if 
                                           fdgs_list[ig] < nb_vpairs else nb_vpairs)
 #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
 #                                               (nx.number_of_nodes(gs) - 1), fdgs)
                for item in idx_change:
                    node1 = int(item / (nx.number_of_nodes(gs) - 1))
                    node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
                    if node2 >= node1: # skip the self pair.
                        node2 += 1
                    # @todo: is the randomness correct?
                    if not gtemp.has_edge(node1, node2):
                        gtemp.add_edge(node1, node2)
 #                        nx.draw_networkx(gs)
 #                        plt.show()
 #                        nx.draw_networkx(gtemp)
 #                        plt.show()
                    else:
                        gtemp.remove_edge(node1, node2)
 #                        nx.draw_networkx(gs)
 #                        plt.show()
 #                        nx.draw_networkx(gtemp)
 #                        plt.show()
 #                nx.draw_networkx(gtemp)
 #                plt.show()
                # compute distance between \psi and the new generated graph.
 #                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
 #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
 #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
                knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
                dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
                if dnew <= dhat: # @todo: the new distance is smaller or also equal?
                    if dnew < dhat:
                        print('\nI am smaller!')
                        print('ig =', str(ig), ', l =', str(trail))
                        print(dhat, '->', dnew)
                        nb_updated += 1
                    elif dnew == dhat:                   
                        print('I am equal!') 
 #                    nx.draw_networkx(gtemp)
 #                    plt.show()
 #                    print(gtemp.nodes(data=True))
 #                    print(gtemp.edges(data=True))
                    dhat = dnew
                    gnew = gtemp.copy()
                    found = True # found better graph.                  
        if found:
            r = 0
            gihat_list = [gnew]
            dihat_list = [dhat]
        else:
            r += 1
 #    dis_best.append(dhat)
    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])  
    return dhat, g_best, nb_updated
 #    return 0, 0, 0
 if __name__ == '__main__':
    #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
    #          'extra_params': {}}  # node/edge symb
    #    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
    #          'extra_params': {}} # node nsymb
    #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
    #          'extra_params': {}}
    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
            'extra_params': {}} # node symb
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
 #          'extra_params': {}}
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
 #            'extra_params': {}} # node symb
    DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #DN = DN[0:10]
    lmbda = 0.03 # termination probalility
    r_max = 10 # recursions
    r_max = 3 # 10 # iteration limit.
    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    #alpha_range = np.linspace(0.1, 0.9, 9)
    k = 5 # k nearest neighbors
    k = 10 # 5 # k nearest neighbors
    # randomly select two molecules
    #np.random.seed(1)
--- a/preimage/run_gk_iam.py
+++ b/preimage/run_gk_iam.py
@@ -245,6 +245,9 @@ def test_remove_bests(Gn, gkernel):
            print(g.edges(data=True))
 ###############################################################################
 # Tests on dataset Letter-H.
 def test_gkiam_letter_h():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
@@ -263,8 +266,10 @@ def test_gkiam_letter_h():
    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_list = []
    sod_min_list = []
    sod_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    for letter in idx_dict:
        print('\n-------------------------------------------------------\n')
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
@@ -280,10 +285,10 @@ def test_gkiam_letter_h():
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            dhat, ghat_list = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), 
                                                   range(len(Gn_let), len(Gn_mix)), km, 
                                                   k, r_max, gkernel, c_ei=1.7, 
                                                   c_er=1.7, c_es=1.7)
            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, 
                Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)), 
                km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
@@ -300,13 +305,18 @@ def test_gkiam_letter_h():
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = median_distance(g_best[0], Gn_let)
        sod_list.append(sod_tmp)
        sod_min_list.append(np.min(sod_tmp))
        sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER', 
                                     ged_method='IPFP', saveGXL='gedlib-letter')
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        sod_ks_min_list.append(sod_ks)
        nb_updated_list.append(nb_updated)
    print('\nsods in graph space: ', sod_list)
    print('\nsmallest sod in graph space for each letter: ', sod_min_list)               
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
    print('\nnumber of updates for each letter: ', nb_updated_list)             
    print('\ntimes:', time_list)
@@ -356,7 +366,8 @@ def test_iam_letter_h():
        for alpha in alpha_range:
            print('alpha =', alpha)
            ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
                Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7)
                Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
@@ -388,18 +399,283 @@ def test_iam_letter_h():
    print('\nsods in kernel space: ', sod_list)
    print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
    print('\ntimes:', time_list)
 def test_random_preimage_letter_h():
    from preimage import random_preimage, compute_kernel
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
 #          'extra_params': {}} # node nsymb
    #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
 #          'extra_params': {}}
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
 #            'extra_params': {}} # node symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'
 #    lmbda = 0.03 # termination probalility
    r_max = 3 # 10 # recursions
    l = 500
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    #alpha_range = np.linspace(0.1, 0.9, 9)
    k = 10 # 5 # k nearest neighbors
    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_list = []
    sod_min_list = []
    for letter in idx_dict:
        print('\n-------------------------------------------------------\n')
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
        # compute
        time0 = time.time()
        km = compute_kernel(Gn_mix, gkernel, True)
        g_best = []
        dis_best = []
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            dhat, ghat_list = random_preimage(Gn_let, Gn_let, [alpha] * len(Gn_let), 
                                                   range(len(Gn_let), len(Gn_mix)), km, 
                                                   k, r_max, gkernel, c_ei=1.7, 
                                                   c_er=1.7, c_es=1.7)
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/gk_iam/')
 #            nx.draw_networkx(g)
 #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = median_distance(g_best[0], Gn_let)
        sod_list.append(sod_tmp)
        sod_min_list.append(np.min(sod_tmp))
    print('\nsods in graph space: ', sod_list)
    print('\nsmallest sod in graph space for each letter: ', sod_min_list)               
    print('\ntimes:', time_list)
 def test_gkiam_mutag():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
 #          'extra_params': {}} # node nsymb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'
    lmbda = 0.03 # termination probalility
    r_max = 3 # recursions
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 20 # k nearest neighbors
    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    for letter in idx_dict:
        print('\n-------------------------------------------------------\n')
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
        # compute
        time0 = time.time()
        km = compute_kernel(Gn_mix, gkernel, True)
        g_best = []
        dis_best = []
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), 
                                                   range(len(Gn_let), len(Gn_mix)), km, 
                                                   k, r_max, gkernel, c_ei=1.7, 
                                                   c_er=1.7, c_es=1.7)
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)
        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/gk_iam/')
 #            nx.draw_networkx(g)
 #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = median_distance(g_best[0], Gn_let)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        sod_ks_min_list.append(sod_ks)
        nb_updated_list.append(nb_updated)
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
    print('\nnumber of updates for each letter: ', nb_updated_list)             
    print('\ntimes:', time_list)
 ###############################################################################
 # Re-test.
 def retest_the_simple_two():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    from test_random_mutag import remove_edges
    # The two simple graphs.
 #    g1 = nx.Graph(name='haha')
 #    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
 #    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
 #    g2 = nx.Graph(name='hahaha')
 #    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
 #                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
 #    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
 #                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
    g1 = nx.Graph(name='haha')
    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
    g2 = nx.Graph(name='hahaha')
    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
 #    # randomly select two molecules
 #    np.random.seed(1)
 #    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
 #    g1 = Gn[idx_gi[0]]
 #    g2 = Gn[idx_gi[1]]
 #    Gn_mix = [g.copy() for g in Gn]
 #    Gn_mix.append(g1.copy())
 #    Gn_mix.append(g2.copy())
    Gn = [g1.copy(), g2.copy()]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # recursions
 #    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 2 # k nearest neighbors
    epsilon = 1e-6
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    c_ei=1
    c_er=1
    c_es=1
    Gn_mix = Gn + [g1.copy(), g2.copy()]
    # compute         
    time0 = time.time()
    km = compute_kernel(Gn_mix, gkernel, True)
    time_km = time.time() - time0
    time_list = []
    sod_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []       
    g_best = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        sod_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        nb_updated_list.append(nb_updated)       
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                with_labels=True)
        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.show()
        print(g_best[idx][0].nodes(data=True))
        print(g_best[idx][0].edges(data=True))
 #        for g in g_best[idx]:
 #            draw_Letter_graph(g, savepath='results/gk_iam/')
 ##            nx.draw_networkx(g)
 ##            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
    # compute the corresponding sod in graph space.
    for idx, item in enumerate(alpha_range):
        sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 if __name__ == '__main__':
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
 #          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
 #          'extra_params': {}}
 #    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
 #        'extra_params': {}} # node symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:20]
 #    import networkx.algorithms.isomorphism as iso
@@ -419,5 +695,10 @@ if __name__ == '__main__':
 #    test_the_simple_two(Gn, 'untilhpathkernel')
 #    test_remove_bests(Gn, 'untilhpathkernel')
    test_gkiam_letter_h()
 #    test_iam_letter_h()
 #    test_gkiam_letter_h()
 #    test_iam_letter_h()
 #    test_random_preimage_letter_h
 ###############################################################################
 # retests.
    retest_the_simple_two()
--- a/preimage/test.py
+++ b/preimage/test.py
@@ -18,17 +18,17 @@ def test() :
    script.PyRestartEnv()
 #    print("Here is the Python function !")
 #    
 #    print("List of Edit Cost Options : ")
 #    for i in script.listOfEditCostOptions :
 #        print (i)
 #    print("")
 #
 #    print("List of Method Options : ")
 #    for j in script.listOfMethodOptions :
 #        print (j)
 #    print("")
    print("Here is the Python function !")
    print("List of Edit Cost Options : ")
    for i in script.listOfEditCostOptions :
        print (i)
    print("")
    print("List of Method Options : ")
    for j in script.listOfMethodOptions :
        print (j)
    print("")
    script.PyLoadGXLGraph('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml')
    listID = script.PyGetGraphIds()
--- a/preimage/test_random_mutag.py
+++ b/preimage/test_random_mutag.py
@@ -0,0 +1,599 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 from tqdm import tqdm
 import os
 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 def test_preimage_mix_2combination_all_pairs():
    from gk_iam import preimage_iam_random_mix, compute_kernel
    from iam import median_distance
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
    alpha_range = np.linspace(0.7, 1, 4)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
    nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
    # test on each pair of graphs.
 #    for idx1 in range(len(Gn) - 1, -1, -1):
 #        for idx2 in range(idx1, -1, -1):
    for idx1 in range(187, 188):
        for idx2 in range(167, 168):
            g1 = Gn[idx1].copy()
            g2 = Gn[idx2].copy()
        #    Gn[10] = []
        #    Gn[10] = []
            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
            plt.show()
            plt.clf()
            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
            plt.show()
            plt.clf()
            ###################################################################            
 #            Gn_mix = [g.copy() for g in Gn]
 #            Gn_mix.append(g1.copy())
 #            Gn_mix.append(g2.copy())
 #            
 #            # compute
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
 #            
 #            # write Gram matrix to file and read it.
 #            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
            ###################################################################
            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
            km = gmfile['gm']
            time_km = gmfile['gmtime']
            # modify mixed gram matrix.
            for i in range(len(Gn)):
                km[i, len(Gn)] = km[i, idx1]
                km[i, len(Gn) + 1] = km[i, idx2]
                km[len(Gn), i] = km[i, idx1]
                km[len(Gn) + 1, i] = km[i, idx2]
            km[len(Gn), len(Gn)] = km[idx1, idx1]
            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
            ###################################################################
 #            # use only the two graphs in median set as candidates.
 #            Gn = [g1.copy(), g2.copy()]
 #            Gn_mix = Gn + [g1.copy(), g2.copy()]
 #            # compute         
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
            time_list = []
            dis_ks_min_list = []
            sod_gs_list = []
            sod_gs_min_list = []
            nb_updated_list_iam = []
            nb_updated_list_random = []
            g_best = []
            # for each alpha
            for alpha in alpha_range:
                print('\n-------------------------------------------------------\n')
                print('alpha =', alpha)
                time0 = time.time()
                dhat, ghat_list, sod_ks, nb_updated_iam, nb_updated_random = \
                    preimage_iam_random_mix(Gn, [g1, g2],
                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
                    l_max, gkernel, epsilon=epsilon, 
                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                                'removeNodes': removeNodes, 'connected': connected_iam},
                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                                'saveGXL': saveGXL})
                time_total = time.time() - time0 + time_km
                print('time: ', time_total)
                time_list.append(time_total)
                dis_ks_min_list.append(dhat)
                g_best.append(ghat_list)
                nb_updated_list_iam.append(nb_updated_iam)       
                nb_updated_list_random.append(nb_updated_random) 
            # show best graphs and save them to file.
            for idx, item in enumerate(alpha_range):
                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
                print('one of the possible corresponding pre-images is')
                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                        with_labels=True)
                plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) 
                            + '_alpha' + str(item) + '.png', format="PNG")
 #                plt.show()
                plt.clf()
 #                print(g_best[idx][0].nodes(data=True))
 #                print(g_best[idx][0].edges(data=True))
        #        for g in g_best[idx]:
        #            draw_Letter_graph(g, savepath='results/gk_iam/')
        ##            nx.draw_networkx(g)
        ##            plt.show()
        #            print(g.nodes(data=True))
        #            print(g.edges(data=True))
            # compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
                                             ged_method=ged_method, saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))
            print('\nsods in graph space: ', sod_gs_list)
            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
            print('\nnumber of updates for each alpha by IAM: ', nb_updated_list_iam)
            print('\nnumber of updates for each alpha by random generation: ', 
                  nb_updated_list_random)
            print('\ntimes:', time_list)
            nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
            nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
            str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
                % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
            with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
                content = file.read()
                file.seek(0, 0)
                file.write(str_fw + content)
 def test_gkiam_2combination_all_pairs():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    alpha_range = np.linspace(1, 1, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    nb_update_mat = np.full((len(Gn), len(Gn)), np.inf)
    # test on each pair of graphs.
 #    for idx1 in range(len(Gn) - 1, -1, -1):
 #        for idx2 in range(idx1, -1, -1):
    for idx1 in range(187, 188):
        for idx2 in range(167, 168):
            g1 = Gn[idx1].copy()
            g2 = Gn[idx2].copy()
        #    Gn[10] = []
        #    Gn[10] = []
            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
            plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG")
            plt.show()
            plt.clf()
            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
            plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG")
            plt.show()
            plt.clf()
            ###################################################################            
 #            Gn_mix = [g.copy() for g in Gn]
 #            Gn_mix.append(g1.copy())
 #            Gn_mix.append(g2.copy())
 #            
 #            # compute
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
 #            
 #            # write Gram matrix to file and read it.
 #            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
            ###################################################################
            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
            km = gmfile['gm']
            time_km = gmfile['gmtime']
            # modify mixed gram matrix.
            for i in range(len(Gn)):
                km[i, len(Gn)] = km[i, idx1]
                km[i, len(Gn) + 1] = km[i, idx2]
                km[len(Gn), i] = km[i, idx1]
                km[len(Gn) + 1, i] = km[i, idx2]
            km[len(Gn), len(Gn)] = km[idx1, idx1]
            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
            ###################################################################
 #            # use only the two graphs in median set as candidates.
 #            Gn = [g1.copy(), g2.copy()]
 #            Gn_mix = Gn + [g1.copy(), g2.copy()]
 #            # compute         
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
            time_list = []
            dis_ks_min_list = []
            sod_gs_list = []
            sod_gs_min_list = []
            nb_updated_list = []       
            g_best = []
            # for each alpha
            for alpha in alpha_range:
                print('\n-------------------------------------------------------\n')
                print('alpha =', alpha)
                time0 = time.time()
                dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
                    gkernel, epsilon=epsilon, 
                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                                'removeNodes': removeNodes, 'connected': connected_iam},
                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                                'saveGXL': saveGXL})
                time_total = time.time() - time0 + time_km
                print('time: ', time_total)
                time_list.append(time_total)
                dis_ks_min_list.append(dhat)
                g_best.append(ghat_list)
                nb_updated_list.append(nb_updated)       
            # show best graphs and save them to file.
            for idx, item in enumerate(alpha_range):
                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
                print('one of the possible corresponding pre-images is')
                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                        with_labels=True)
                plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' + str(idx2) 
                            + '_alpha' + str(item) + '.png', format="PNG")
 #                plt.show()
                plt.clf()
 #                print(g_best[idx][0].nodes(data=True))
 #                print(g_best[idx][0].edges(data=True))
        #        for g in g_best[idx]:
        #            draw_Letter_graph(g, savepath='results/gk_iam/')
        ##            nx.draw_networkx(g)
        ##            plt.show()
        #            print(g.nodes(data=True))
        #            print(g.edges(data=True))
            # compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
                                             ged_method=ged_method, saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))
            print('\nsods in graph space: ', sod_gs_list)
            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
            print('\nnumber of updates for each alpha: ', nb_updated_list)             
            print('\ntimes:', time_list)
            nb_update_mat[idx1, idx2] = nb_updated_list[0]
            str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2, nb_updated_list[0])
            with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file:
                content = file.read()
                file.seek(0, 0)
                file.write(str_fw + content)
 def test_gkiam_2combination():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 20 # k nearest neighbors
    epsilon = 1e-6
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    c_ei=1
    c_er=1
    c_es=1
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [10, 11] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
 #    Gn[10] = []
 #    Gn[10] = []
 #    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
 #    plt.show()
 #    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
 #    plt.show() 
    Gn_mix = [g.copy() for g in Gn]
    Gn_mix.append(g1.copy())
    Gn_mix.append(g2.copy())
    # compute
 #    time0 = time.time()
 #    km = compute_kernel(Gn_mix, gkernel, True)
 #    time_km = time.time() - time0
    # write Gram matrix to file and read it.
 #    np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km)
    gmfile = np.load('results/gram_matrix.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []       
    g_best = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        nb_updated_list.append(nb_updated)       
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                with_labels=True)
        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.show()
        print(g_best[idx][0].nodes(data=True))
        print(g_best[idx][0].edges(data=True))
 #        for g in g_best[idx]:
 #            draw_Letter_graph(g, savepath='results/gk_iam/')
 ##            nx.draw_networkx(g)
 ##            plt.show()
 #            print(g.nodes(data=True))
 #            print(g.edges(data=True))
    # compute the corresponding sod in graph space.
    for idx, item in enumerate(alpha_range):
        sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 def test_random_preimage_2combination():
 #    from gk_iam import compute_kernel
    from preimage import random_preimage
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:12]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
 #    print(dis_max, dis_min, dis_mean)
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l = 500
    alpha_range = np.linspace(0, 1, 11)
    k = 5 # k nearest neighbors
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
 #    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
 #    plt.show()
 #    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
 #    plt.show()    
    ######################################################################
 #    Gn_mix = [g.copy() for g in Gn]
 #    Gn_mix.append(g1.copy())
 #    Gn_mix.append(g2.copy())
 #    
 ##    g_tmp = iam([g1, g2])
 ##    nx.draw_networkx(g_tmp)
 ##    plt.show()
 #    
 #    # compute 
 #    time0 = time.time()
 #    km = compute_kernel(Gn_mix, gkernel, True)
 #    time_km = time.time() - time0
    ###################################################################
    idx1 = idx_gi[0]
    idx2 = idx_gi[1]
    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']
    # modify mixed gram matrix.
    for i in range(len(Gn)):
        km[i, len(Gn)] = km[i, idx1]
        km[i, len(Gn) + 1] = km[i, idx2]
        km[len(Gn), i] = km[i, idx1]
        km[len(Gn) + 1, i] = km[i, idx2]
    km[len(Gn), len(Gn)] = km[idx1, idx1]
    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
    ###################################################################
    time_list = []
    nb_updated_list = []
    g_best = []
    dis_ks_min_list = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat, nb_updated = random_preimage(Gn, [g1, g2], [alpha, 1 - alpha], 
                                          range(len(Gn), len(Gn) + 2), km,
                                          k, r_max, l, gkernel)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat)
        nb_updated_list.append(nb_updated)
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
                with_labels=True)
        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.show()
        plt.clf()
        print(g_best[idx].nodes(data=True))
        print(g_best[idx].edges(data=True))
 #        # compute the corresponding sod in graph space. (alpha range not considered.)
 #        sod_tmp, _ = median_distance(g_best[0], Gn_let)
 #        sod_gs_list.append(sod_tmp)
 #        sod_gs_min_list.append(np.min(sod_tmp))
 #        sod_ks_min_list.append(sod_ks)
 #        nb_updated_list.append(nb_updated)
 #    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 ###############################################################################
 # help functions
 def remove_edges(Gn):
    for G in Gn:
        for _, _, attrs in G.edges(data=True):
            attrs.clear()
 def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
    from gk_iam import compute_kernel
    dis_mat = np.empty((len(Gn), len(Gn)))
    if Kmatrix == None:
        Kmatrix = compute_kernel(Gn, gkernel, True)
    for i in range(len(Gn)):
        for j in range(i, len(Gn)):
            dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
            if dis < 0:
                if dis > -1e-10:
                    dis = 0
                else:
                    raise ValueError('The distance is negative.')
            dis_mat[i, j] = np.sqrt(dis)
            dis_mat[j, i] = dis_mat[i, j]
    dis_max = np.max(np.max(dis_mat))
    dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
    dis_mean = np.mean(np.mean(dis_mat))
    return dis_mat, dis_max, dis_min, dis_mean
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 #    test_random_preimage_2combination()
 #    test_gkiam_2combination()
 #    test_gkiam_2combination_all_pairs()
    test_preimage_mix_2combination_all_pairs()
--- a/pygraph/kernels/untilHPathKernel.py
+++ b/pygraph/kernels/untilHPathKernel.py
@@ -51,6 +51,7 @@ def untilhpathkernel(*args,
        applied for the graph kernel. The Following choices are available:
        'MinMax': use the MiniMax kernel and counting feature map.
        'tanimoto': use the Tanimoto kernel and binary feature map.
        None: no sub-kernel is used, the kernel is computed directly.
    compute_method : string
        Computation method to store paths and compute the graph kernel. The 
        Following choices are available:
@@ -72,14 +73,16 @@ def untilhpathkernel(*args,
    Kmatrix = np.zeros((len(Gn), len(Gn)))
    ds_attrs = get_dataset_attributes(
        Gn,
        attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
        attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', 
                    'edge_attr_dim', 'is_directed'],
        node_label=node_label, edge_label=edge_label)
    if not ds_attrs['node_labeled']:
        for G in Gn:
            nx.set_node_attributes(G, '0', 'atom')
    if not ds_attrs['edge_labeled']:
        for G in Gn:
            nx.set_edge_attributes(G, '0', 'bond_type')
    if k_func != None:
        if not ds_attrs['node_labeled']:
            for G in Gn:
                nx.set_node_attributes(G, '0', 'atom')
        if not ds_attrs['edge_labeled']:
            for G in Gn:
                nx.set_edge_attributes(G, '0', 'bond_type')
    start_time = time.time()        
@@ -93,12 +96,15 @@ def untilhpathkernel(*args,
    else:
        chunksize = 100
    all_paths = [[] for _ in range(len(Gn))]
    if compute_method == 'trie':
    if compute_method == 'trie' and k_func != None:
        getps_partial = partial(wrapper_find_all_path_as_trie, depth, 
                                ds_attrs, node_label, edge_label)
    else:  
    elif compute_method != 'trie' and k_func != None:  
        getps_partial = partial(wrapper_find_all_paths_until_length, depth, 
                                ds_attrs, node_label, edge_label)    
                                ds_attrs, node_label, edge_label, True)  
    else: 
        getps_partial = partial(wrapper_find_all_paths_until_length, depth, 
                                ds_attrs, node_label, edge_label, False)
    if verbose:
        iterator = tqdm(pool.imap_unordered(getps_partial, itr, chunksize),
                        desc='getting paths', file=sys.stdout)
@@ -110,10 +116,12 @@ def untilhpathkernel(*args,
    pool.join()
 #    for g in Gn:
 #        if compute_method == 'trie':
 #        if compute_method == 'trie' and k_func != None:
 #            find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label)
 #        else:
 #        elif compute_method != 'trie' and k_func != None:  
 #            find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label)
 #        else: 
 #            find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False)
 ##    size = sys.getsizeof(all_paths)
 ##    for item in all_paths:
@@ -130,20 +138,27 @@ def untilhpathkernel(*args,
 ##        all_paths[i] = ps
 ##    print(time.time() - ttt)
    if compute_method == 'trie':
    if compute_method == 'trie' and k_func != None:
        def init_worker(trie_toshare):
            global G_trie
            G_trie = trie_toshare
        do_partial = partial(wrapper_uhpath_do_trie, k_func)
        parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
                    glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) 
    else:
    elif compute_method != 'trie' and k_func != None:
        def init_worker(plist_toshare):
            global G_plist
            G_plist = plist_toshare
        do_partial = partial(wrapper_uhpath_do_naive, k_func)   
        parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
                    glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) 
    else:
        def init_worker(plist_toshare):
            global G_plist
            G_plist = plist_toshare
        do_partial = partial(wrapper_uhpath_do_kernelless, ds_attrs, edge_kernels)   
        parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
                    glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) 
 #    # ---- direct running, normally use single CPU core. ----
@@ -353,12 +368,62 @@ def wrapper_uhpath_do_naive(k_func, itr):
    return i, j, _untilhpathkernel_do_naive(G_plist[i], G_plist[j], k_func)
 def _untilhpathkernel_do_kernelless(paths1, paths2, k_func):
    """Calculate path graph kernels up to depth d between 2 graphs naively.
    Parameters
    ----------
    paths_list : list of list
        List of list of paths in all graphs, where for unlabeled graphs, each 
        path is represented by a list of nodes; while for labeled graphs, each 
        path is represented by a string consists of labels of nodes and/or 
        edges on that path.
    k_func : function
        A kernel function applied using different notions of fingerprint 
        similarity.
    Return
    ------
    kernel : float
        Path kernel up to h between 2 graphs.
    """
    all_paths = list(set(paths1 + paths2))
    if k_func == 'tanimoto':
        length_union = len(set(paths1 + paths2))
        kernel = (len(set(paths1)) + len(set(paths2)) -
                  length_union) / length_union
 #        vector1 = [(1 if path in paths1 else 0) for path in all_paths]
 #        vector2 = [(1 if path in paths2 else 0) for path in all_paths]
 #        kernel_uv = np.dot(vector1, vector2)
 #        kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
    else:  # MinMax kernel
        path_count1 = Counter(paths1)
        path_count2 = Counter(paths2)
        vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
                   for key in all_paths]
        vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0)
                   for key in all_paths]
        kernel = np.sum(np.minimum(vector1, vector2)) / \
            np.sum(np.maximum(vector1, vector2))
    return kernel
 def wrapper_uhpath_do_kernelless(k_func, itr):
    i = itr[0]
    j = itr[1]
    return i, j, _untilhpathkernel_do_kernelless(G_plist[i], G_plist[j], k_func)
 # @todo: (can be removed maybe)  this method find paths repetively, it could be faster.
 def find_all_paths_until_length(G,
                                length,
                                ds_attrs,
                                node_label='atom',
                                edge_label='bond_type'):
                                edge_label='bond_type',
                                tolabelseqs=True):
    """Find all paths no longer than a certain maximum length in a graph. A 
    recursive depth first search is applied.
@@ -398,7 +463,7 @@ def find_all_paths_until_length(G,
    #     path_l = path_l_new[:]
    path_l = [[n] for n in G.nodes]  # paths of length l
    all_paths = path_l[:]
    all_paths = [p.copy() for p in path_l]
    for l in range(1, length + 1):
        path_lplus1 = []
        for path in path_l:
@@ -409,7 +474,7 @@ def find_all_paths_until_length(G,
                    path_lplus1.append(tmp)
        all_paths += path_lplus1
        path_l = path_lplus1[:]
        path_l = [p.copy() for p in path_lplus1]
    # for i in range(0, length + 1):
    #     new_paths = find_all_paths(G, i)
@@ -419,15 +484,18 @@ def find_all_paths_until_length(G,
    # consider labels
 #    print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
    return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)
    print()
    return (paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) 
            if tolabelseqs else all_paths)
 def wrapper_find_all_paths_until_length(length, ds_attrs, node_label, 
                                     edge_label, itr_item):
                                     edge_label, tolabelseqs, itr_item):
    g = itr_item[0]
    i = itr_item[1]
    return i, find_all_paths_until_length(g, length, ds_attrs,
                node_label=node_label, edge_label=edge_label)
                node_label=node_label, edge_label=edge_label, 
                tolabelseqs=tolabelseqs)
 def find_all_path_as_trie(G,
--- a/pygraph/utils/graphfiles.py
+++ b/pygraph/utils/graphfiles.py
@@ -84,7 +84,7 @@ def loadGXL(filename):
    return g
 def saveGXL(graph, filename, method='gedlib-letter'):
 def saveGXL(graph, filename, method='benoit'):
    if method == 'benoit':
        import xml.etree.ElementTree as ET
        root_node = ET.Element('gxl')
@@ -131,13 +131,13 @@ def saveGXL(graph, filename, method='gedlib-letter'):
        gxl_file.write("<gxl>\n")
        gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"true\" edgemode=\"undirected\">\n")
        for v, attrs in graph.nodes(data=True):
            gxl_file.write("<node id=\"_" + str(v) + "\">\n")
            gxl_file.write("<attr name=\"" + "chem" + "\"><int>" + str(attrs['atom']) + "</int></attr>\n")
            gxl_file.write("<node id=\"_" + str(v) + "\">")
            gxl_file.write("<attr name=\"" + "chem" + "\"><int>" + str(attrs['atom']) + "</int></attr>")
            gxl_file.write("</node>\n")
        for v1, v2, attrs in graph.edges(data=True):
            gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\">\n")
 #            gxl_file.write("<attr name=\"valence\"><int>" + str(attrs['bond_type']) + "</int></attr>\n")
            gxl_file.write("<attr name=\"valence\"><int>" + "1" + "</int></attr>\n")
            gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\">")
 #            gxl_file.write("<attr name=\"valence\"><int>" + str(attrs['bond_type']) + "</int></attr>")
            gxl_file.write("<attr name=\"valence\"><int>" + "1" + "</int></attr>")
            gxl_file.write("</edge>\n")
        gxl_file.write("</graph>\n")
        gxl_file.write("</gxl>\n")
@@ -485,7 +485,7 @@ def loadDataset(filename, filename_y=None, extra_params=None):
    return data, y
 def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile'):
 def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None):
    """Save list of graphs.
    """
    import os
@@ -502,7 +502,7 @@ def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile'):
            fgroup.write("\n<GraphCollection>")
            for idx, g in enumerate(Gn):
                fname_tmp = "graph" + str(idx) + ".gxl"
                saveGXL(g, dirname_ds + fname_tmp)
                saveGXL(g, dirname_ds + fname_tmp, method=xparams['method'])
                fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>")
            fgroup.write("\n</GraphCollection>")
            fgroup.close()