Update pre-image.

5 years ago · dd810b92da
--- a/preimage/fitDistance.py
+++ b/preimage/fitDistance.py
@@ -0,0 +1,103 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct 16 14:20:06 2019
@author: ljia
 """
 import numpy as np
 from tqdm import tqdm
 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from ged import GED, get_nb_edit_operations
 from utils import kernel_distance_matrix
 def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
    c_vi = 1
    c_vr = 1
    c_vs = 1
    c_ei = 1
    c_er = 1
    c_es = 1
    # compute distances in feature space.
    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel)
    dis_k_vec = []
    for i in range(len(dis_k_mat)):
        for j in range(i, len(dis_k_mat)):
            dis_k_vec.append(dis_k_mat[i, j])
    dis_k_vec = np.array(dis_k_vec)
    residual_list = []
    edit_cost_list = []
    for itr in range(itr_max):
        print('iteration', itr)
        ged_all = []
        n_vi_all = []
        n_vr_all = []
        n_vs_all = []
        n_ei_all = []
        n_er_all = []
        n_es_all = []
        # compute GEDs and numbers of edit operations.
        edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
        edit_cost_list.append(edit_cost_constant)
        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
 #        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', 
                    cost='CONSTANT', method='IPFP', 
                    edit_cost_constant=edit_cost_constant, stabilizer='min', 
                    repeat=30)
                ged_all.append(dis)
                n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i], 
                    Gn[j], pi_forward, pi_backward)
                n_vi_all.append(n_vi) 
                n_vr_all.append(n_vr)
                n_vs_all.append(n_vs) 
                n_ei_all.append(n_ei) 
                n_er_all.append(n_er)
                n_es_all.append(n_es)
        residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
        residual_list.append(residual)
        # "fit" geds to distances in feature space by tuning edit costs using the
        # Least Squares Method.
        nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all),
                                       np.array(n_vs_all), np.array(n_ei_all),
                                       np.array(n_er_all), np.array(n_es_all)))
        edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
                                                     rcond=None)
        for i in range(len(edit_costs)):
            if edit_costs[i] < 0:
                if edit_costs[i] > -1e-3:
                    edit_costs[i] = 0
 #                else:
 #                    raise ValueError('The edit cost is negative.')
        c_vi = edit_costs[0]
        c_vr = edit_costs[1]
        c_vs = edit_costs[2]
        c_ei = edit_costs[3]
        c_er = edit_costs[4]
        c_es = edit_costs[5]
    return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list
 if __name__ == '__main__':
    from utils import remove_edges
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    Gn = Gn[0:10]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    itr_max = 10
    c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \
        fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
--- a/preimage/ged.py
+++ b/preimage/ged.py
@@ -0,0 +1,197 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Oct 17 18:44:59 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 from tqdm import tqdm
 import sys
 from gedlibpy import librariesImport, gedlibpy
 def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
        edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50):
    """
    Compute GED for 2 graphs.
    """
    if lib == 'gedlibpy':
        def convertGraph(G):
            """Convert a graph to the proper NetworkX format that can be
            recognized by library gedlibpy.
            """
            G_new = nx.Graph()
            for nd, attrs in G.nodes(data=True):
                G_new.add_node(str(nd), chem=attrs['atom'])
            for nd1, nd2, attrs in G.edges(data=True):
 #                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
                G_new.add_edge(str(nd1), str(nd2))
            return G_new
        gedlibpy.restart_env()
        gedlibpy.add_nx_graph(convertGraph(g1), "")
        gedlibpy.add_nx_graph(convertGraph(g2), "")
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
        gedlibpy.init()
        gedlibpy.set_method(method, "")
        gedlibpy.init_method()
        g = listID[0]
        h = listID[1]
        if stabilizer == None:
            gedlibpy.run_method(g, h)
            pi_forward = gedlibpy.get_forward_map(g, h)
            pi_backward = gedlibpy.get_backward_map(g, h)
            upper = gedlibpy.get_upper_bound(g, h)
            lower = gedlibpy.get_lower_bound(g, h)        
        elif stabilizer == 'min':
            upper = np.inf
            for itr in range(repeat):                
                gedlibpy.run_method(g, h)                
                upper_tmp = gedlibpy.get_upper_bound(g, h)                
                if upper_tmp < upper:
                    upper = upper_tmp
                    pi_forward = gedlibpy.get_forward_map(g, h)
                    pi_backward = gedlibpy.get_backward_map(g, h)
                    lower = gedlibpy.get_lower_bound(g, h)
                if upper == 0:
                    break
        dis = upper
        # make the map label correct (label remove map as np.inf)
        nodes1 = [n for n in g1.nodes()]
        nodes2 = [n for n in g2.nodes()]
        nb1 = nx.number_of_nodes(g1)
        nb2 = nx.number_of_nodes(g2)
        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
    return dis, pi_forward, pi_backward
 def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
        edit_cost_constant=[], stabilizer='min', repeat=50):
    """
    Compute GEDs for a group of graphs.
    """
    if lib == 'gedlibpy':
        def convertGraph(G):
            """Convert a graph to the proper NetworkX format that can be
            recognized by library gedlibpy.
            """
            G_new = nx.Graph()
            for nd, attrs in G.nodes(data=True):
                G_new.add_node(str(nd), chem=attrs['atom'])
            for nd1, nd2, attrs in G.edges(data=True):
 #                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
                G_new.add_edge(str(nd1), str(nd2))
            return G_new
        gedlibpy.restart_env()
        gedlibpy.add_nx_graph(convertGraph(g1), "")
        gedlibpy.add_nx_graph(convertGraph(g2), "")
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
        gedlibpy.init()
        gedlibpy.set_method(method, "")
        gedlibpy.init_method()
        g = listID[0]
        h = listID[1]
        if stabilizer == None:
            gedlibpy.run_method(g, h)
            pi_forward = gedlibpy.get_forward_map(g, h)
            pi_backward = gedlibpy.get_backward_map(g, h)
            upper = gedlibpy.get_upper_bound(g, h)
            lower = gedlibpy.get_lower_bound(g, h)        
        elif stabilizer == 'min':
            upper = np.inf
            for itr in range(repeat):                
                gedlibpy.run_method(g, h)                
                upper_tmp = gedlibpy.get_upper_bound(g, h)                
                if upper_tmp < upper:
                    upper = upper_tmp
                    pi_forward = gedlibpy.get_forward_map(g, h)
                    pi_backward = gedlibpy.get_backward_map(g, h)
                    lower = gedlibpy.get_lower_bound(g, h)
                if upper == 0:
                    break
        dis = upper
        # make the map label correct (label remove map as np.inf)
        nodes1 = [n for n in g1.nodes()]
        nodes2 = [n for n in g2.nodes()]
        nb1 = nx.number_of_nodes(g1)
        nb2 = nx.number_of_nodes(g2)
        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
    return dis, pi_forward, pi_backward
 def ged_median(Gn, Gn_median, measure='ged', verbose=False, 
                    ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
    dis_list = []
    pi_forward_list = []
    for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
                       file=sys.stdout) if verbose else enumerate(Gn):
        dis_sum = 0
        pi_forward_list.append([])
        for G_p in Gn_median:
            dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
                cost=ged_cost, method=ged_method, saveGXL=saveGXL)
            pi_forward_list[idx].append(pi_tmp_forward)
            dis_sum += dis_tmp
        dis_list.append(dis_sum)
    return dis_list, pi_forward_list
 def get_nb_edit_operations(g1, g2, forward_map, backward_map):
    """Compute the number of each edit operations.
    """
    n_vi = 0
    n_vr = 0
    n_vs = 0
    n_ei = 0
    n_er = 0
    n_es = 0
    nodes1 = [n for n in g1.nodes()]
    for i, map_i in enumerate(forward_map):
        if map_i == np.inf:
            n_vr += 1
        elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
            n_vs += 1
    for map_i in backward_map:
        if map_i == np.inf:
            n_vi += 1
 #    idx_nodes1 = range(0, len(node1))
    edges1 = [e for e in g1.edges()]
    nb_edges2_cnted = 0
    for n1, n2 in edges1:
        idx1 = nodes1.index(n1)
        idx2 = nodes1.index(n2)
        # one of the nodes is removed, thus the edge is removed.
        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
            n_er += 1
        # corresponding edge is in g2. Edge label is not considered.
        elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
            (forward_map[idx2], forward_map[idx1]) in g2.edges():
                nb_edges2_cnted += 1
        # corresponding nodes are in g2, however the edge is removed.
        else:
            n_er += 1
    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
    return n_vi, n_vr, n_vs, n_ei, n_er, n_es
--- a/preimage/iam.py
+++ b/preimage/iam.py
@@ -12,10 +12,10 @@ import networkx as nx
 from tqdm import tqdm
 import sys
 from gedlibpy import librariesImport, gedlibpy
 sys.path.insert(0, "../")
 from pygraph.utils.graphdataset import get_dataset_attributes
 from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
 from ged import GED, ged_median
 def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, 
@@ -237,7 +237,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
 #        # find the best graph generated in this iteration and update pi_p.
        # @todo: should we update all graphs generated or just the best ones?
        dis_list, pi_forward_list = median_distance(G_new_list, Gn_median, 
        dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, 
            **params_ged)
        # @todo: should we remove the identical and connectivity check? 
        # Don't know which is faster.
@@ -362,7 +362,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
    # phase 1: initilize.
    # compute set-median.
    dis_min = np.inf
    dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median,
    dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
        **params_ged)
    # find all smallest distances.
    if allBestInit: # try all best init graphs.
@@ -426,96 +426,6 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
 ###############################################################################
 # Useful functions.
 def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit', 
        stabilizer='min'):
    """
    Compute GED.
    """
    if lib == 'gedlibpy':
        def convertGraph(G):
            """Convert a graph to the proper NetworkX format that can be
            recognized by library gedlibpy.
            """
            G_new = nx.Graph()
            for nd, attrs in G.nodes(data=True):
                G_new.add_node(str(nd), chem=attrs['atom'])
            for nd1, nd2, attrs in G.edges(data=True):
 #                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
                G_new.add_edge(str(nd1), str(nd2))
            return G_new
        gedlibpy.restart_env()
        gedlibpy.add_nx_graph(convertGraph(g1), "")
        gedlibpy.add_nx_graph(convertGraph(g2), "")
        listID = gedlibpy.get_all_graph_ids()
        gedlibpy.set_edit_cost(cost)
        gedlibpy.init()
        gedlibpy.set_method(method, "")
        gedlibpy.init_method()
        g = listID[0]
        h = listID[1]
        if stabilizer == None:
            gedlibpy.run_method(g, h)
            pi_forward = gedlibpy.get_forward_map(g, h)
            pi_backward = gedlibpy.get_backward_map(g, h)
            upper = gedlibpy.get_upper_bound(g, h)
            lower = gedlibpy.get_lower_bound(g, h)        
        elif stabilizer == 'min':
            upper = np.inf
            for itr in range(50):                
                gedlibpy.run_method(g, h)                
                upper_tmp = gedlibpy.get_upper_bound(g, h)                
                if upper_tmp < upper:
                    upper = upper_tmp
                    pi_forward = gedlibpy.get_forward_map(g, h)
                    pi_backward = gedlibpy.get_backward_map(g, h)
                    lower = gedlibpy.get_lower_bound(g, h)
                if upper == 0:
                    break
        dis = upper
        # make the map label correct (label remove map as np.inf)
        nodes1 = [n for n in g1.nodes()]
        nodes2 = [n for n in g2.nodes()]
        nb1 = nx.number_of_nodes(g1)
        nb2 = nx.number_of_nodes(g2)
        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
    return dis, pi_forward, pi_backward
 def median_distance(Gn, Gn_median, measure='ged', verbose=False, 
                    ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
    dis_list = []
    pi_forward_list = []
    for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
                       file=sys.stdout) if verbose else enumerate(Gn):
        dis_sum = 0
        pi_forward_list.append([])
        for G_p in Gn_median:
            dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
                cost=ged_cost, method=ged_method, saveGXL=saveGXL)
            pi_forward_list[idx].append(pi_tmp_forward)
            dis_sum += dis_tmp
        dis_list.append(dis_sum)
    return dis_list, pi_forward_list
 ###############################################################################
 # Old implementations.
--- a/preimage/preimage_iam.py
+++ b/preimage/preimage_iam.py
@@ -13,20 +13,13 @@ and the iterative alternate minimizations (IAM) in reference [2].
 """
 import sys
 import numpy as np
 import multiprocessing
 from tqdm import tqdm
 import networkx as nx
 import matplotlib.pyplot as plt
 import random
 from iam import iam_upgraded
 sys.path.insert(0, "../")
 from pygraph.kernels.marginalizedKernel import marginalizedkernel
 from pygraph.kernels.untilHPathKernel import untilhpathkernel
 from pygraph.kernels.spKernel import spkernel
 import functools
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
 from pygraph.kernels.structuralspKernel import structuralspkernel
 from utils import dis_gstar, compute_kernel
 def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
@@ -72,13 +65,13 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
    for gi in Gk:
        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 #        nx.draw_networkx(gi)
        plt.show()
 #        draw_Letter_graph(g)
        print(gi.nodes(data=True))
        print(gi.edges(data=True))
 #    for gi in Gk:
 #        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 ##        nx.draw_networkx(gi)
 #        plt.show()
 ##        draw_Letter_graph(g)
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
 #    i = 1
    r = 0
@@ -173,7 +166,7 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
        print('\nthe k shortest distances are', dis_k)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
    print('\nthe graph is updated', nb_updated, 'times.')
    print('\n\nthe graph is updated', nb_updated, 'times.')
    print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
    print('distances in kernel space:', dis_of_each_itr, '\n')
@@ -227,13 +220,13 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
 #        print(g.nodes(data=True))
 #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
    for gi in Gk:
        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 #        nx.draw_networkx(gi)
        plt.show()
 #        draw_Letter_graph(g)
        print(gi.nodes(data=True))
        print(gi.edges(data=True))
 #    for gi in Gk:
 #        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
 ##        nx.draw_networkx(gi)
 #        plt.show()
 ##        draw_Letter_graph(g)
 #        print(gi.nodes(data=True))
 #        print(gi.edges(data=True))
    r = 0
    itr_total = 0
@@ -394,7 +387,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
                    # compute distance between \psi and the new generated graph.
                    knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
                    dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
                    dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), 
                                         alpha, knew, withterm3=False)
                    # @todo: the new distance is smaller or also equal?
                    if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
                        # check if the new distance is the same as one in D_k.
@@ -448,7 +442,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
        print('\nthe k shortest distances are', dis_k)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
    print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
    print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
          nb_updated_random, 'times.')
    print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 
          'times, and by random generation', nb_updated_k_random, 'times.')
@@ -459,60 +453,6 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
 ###############################################################################
 # useful functions.
 def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
    term1 = Kmatrix[idx_g, idx_g]
    term2 = 0
    for i, a in enumerate(alpha):
        term2 += a * Kmatrix[idx_g, idx_gi[i]]
    term2 *= 2
    if withterm3 == False:
        for i1, a1 in enumerate(alpha):
            for i2, a2 in enumerate(alpha):
                term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    return np.sqrt(term1 - term2 + term3)
 def compute_kernel(Gn, graph_kernel, verbose):
    if graph_kernel == 'marginalizedkernel':
        Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
                                  p_quit=0.03, n_iteration=10, remove_totters=False,
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'untilhpathkernel':
        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
                                  depth=10, k_func='MinMax', compute_method='trie',
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'spkernel':
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
        Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'structuralspkernel':
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
        Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    # normalization
    Kmatrix_diag = Kmatrix.diagonal().copy()
    for i in range(len(Kmatrix)):
        for j in range(i, len(Kmatrix)):
            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
            Kmatrix[j][i] = Kmatrix[i][j]
    return Kmatrix
 def gram2distances(Kmatrix):
    dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
    for i1 in range(len(Kmatrix)):
        for i2 in range(len(Kmatrix)):
            dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
    dmatrix = np.sqrt(dmatrix)
    return dmatrix
 ###############################################################################
 # Old implementations.
 #def gk_iam(Gn, alpha):
--- a/preimage/preimage_random.py
+++ b/preimage/preimage_random.py
@@ -10,51 +10,14 @@ pre-image
 import sys
 import numpy as np
 import random
 import multiprocessing
 from tqdm import tqdm
 import networkx as nx
 import matplotlib.pyplot as plt
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.kernels.marginalizedKernel import marginalizedkernel
 from pygraph.kernels.untilHPathKernel import untilhpathkernel
 from pygraph.kernels.spKernel import spkernel
 import functools
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
 from pygraph.kernels.structuralspKernel import structuralspkernel
 from gk_iam import dis_gstar
 def compute_kernel(Gn, graph_kernel, verbose):
    if graph_kernel == 'marginalizedkernel':
        Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
                                  p_quit=0.03, n_iteration=10, remove_totters=False,
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'untilhpathkernel':
        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
                                  depth=10, k_func='MinMax', compute_method='trie',
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'spkernel':
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
        Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'structuralspkernel':
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
        Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    # normalization
    Kmatrix_diag = Kmatrix.diagonal().copy()
    for i in range(len(Kmatrix)):
        for j in range(i, len(Kmatrix)):
            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
            Kmatrix[j][i] = Kmatrix[i][j]
    return Kmatrix
 from utils import compute_kernel, dis_gstar
 def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
@@ -105,6 +68,7 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
    r = 0
 #    sod_list = [dhat]
 #    found = False
    dis_of_each_itr = [dhat]
    nb_updated = 0
    g_best = []
    while r < r_max:
@@ -162,7 +126,8 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
 #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
 #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
                knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
                dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
                dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, 
                                 withterm3=False)
                if dnew <= dhat: # @todo: the new distance is smaller or also equal?
                    if dnew < dhat:
                        print('\nI am smaller!')
@@ -184,13 +149,19 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
            dihat_list = [dhat]
        else:
            r += 1
        dis_of_each_itr.append(dhat)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
 #    dis_best.append(dhat)
    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])  
    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
    print('distances in kernel space:', dis_of_each_itr, '\n')
    return dhat, g_best, nb_updated
 #    return 0, 0, 0
 if __name__ == '__main__':
    from pygraph.utils.graphfiles import loadDataset
 #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
 #          'extra_params': {}}  # node/edge symb
--- a/preimage/test.py
+++ b/preimage/test.py
@@ -80,5 +80,6 @@ def testNxGrapĥ():
    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
 #test()
 init() 
 #testNxGrapĥ()
--- a/preimage/test_iam.py
+++ b/preimage/test_iam.py
@@ -0,0 +1,167 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 #from tqdm import tqdm
 #import os
 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from iam import iam_upgraded
 from utils import remove_edges, compute_kernel, get_same_item_indices
 from ged import ged_median
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_iam_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
 #    lmbda = 0.03 # termination probalility
 #    r_max = 10 # iteration limit for pre-image.
 #    alpha_range = np.linspace(0.5, 0.5, 1)
 #    k = 5 # k nearest neighbors
 #    epsilon = 1e-6
 #    InitIAMWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
        Gn_candidate = [g.copy() for g in Gn_median]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate, 
            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, 
            epsilon=epsilon_iam, removeNodes=removeNodes, 
            connected=connected_iam, 
            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                        'saveGXL': saveGXL})
        time_total = time.time() - time0
        print('\ntime: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat) 
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        print('\nnumber of updates of the best graph: ', nb_updated)
        nb_updated_list.append(nb_updated)
        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
        nb_updated_k_list.append(nb_updated_k)
        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                with_labels=True)
        plt.show()
        plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
                    '.png', format="PNG")
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
          nb_updated_list)
    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
          nb_updated_k_list)
    print('\ntimes:', time_list)
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # tests on different numbers of median-sets.
    test_iam_median_nb()
--- a/preimage/test_others.py
+++ b/preimage/test_others.py
@@ -15,6 +15,9 @@ import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from median import draw_Letter_graph
 from ged import GED, ged_median
 from utils import get_same_item_indices, compute_kernel, gram2distances, \
    dis_gstar, remove_edges
 # --------------------------- These are tests --------------------------------#
@@ -47,7 +50,6 @@ def test_who_is_the_closest_in_kernel_space(Gn):
 def test_who_is_the_closest_in_GED_space(Gn):
    from iam import GED
    idx_gi = [0, 6]
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
@@ -142,7 +144,7 @@ def test_new_IAM_allGraph_deleteNodes(Gn):
 def test_the_simple_two(Gn, gkernel):
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from gk_iam import gk_iam_nearest_multi
    lmbda = 0.03 # termination probalility
    r_max = 10 # recursions
    l = 500
@@ -199,7 +201,7 @@ def test_the_simple_two(Gn, gkernel):
 def test_remove_bests(Gn, gkernel):
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from gk_iam import gk_iam_nearest_multi
    lmbda = 0.03 # termination probalility
    r_max = 10 # recursions
    l = 500
@@ -249,8 +251,7 @@ def test_remove_bests(Gn, gkernel):
 # Tests on dataset Letter-H.
 def test_gkiam_letter_h():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    from gk_iam import gk_iam_nearest_multi
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -305,7 +306,7 @@ def test_gkiam_letter_h():
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER', 
        sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', 
                                     ged_method='IPFP', saveGXL='gedlib-letter')
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
@@ -318,19 +319,6 @@ def test_gkiam_letter_h():
    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
    print('\nnumber of updates for each letter: ', nb_updated_list)             
    print('\ntimes:', time_list)
 def get_same_item_indices(ls):
    """Get the indices of the same items in a list. Return a dict keyed by items.
    """
    idx_dict = {}
    for idx, item in enumerate(ls):
        if item in idx_dict:
            idx_dict[item].append(idx)
        else:
            idx_dict[item] = [idx]
    return idx_dict
 #def compute_letter_median_by_average(Gn):
 #    return g_median
@@ -338,7 +326,6 @@ def get_same_item_indices(ls):
 def test_iam_letter_h():
    from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
    from gk_iam import dis_gstar, compute_kernel
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -402,7 +389,7 @@ def test_iam_letter_h():
 def test_random_preimage_letter_h():
    from preimage_random import preimage_random, compute_kernel
    from preimage_random import preimage_random
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -463,7 +450,7 @@ def test_random_preimage_letter_h():
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = median_distance(g_best[0], Gn_let)
        sod_tmp, _ = ged_median(g_best[0], Gn_let)
        sod_list.append(sod_tmp)
        sod_min_list.append(np.min(sod_tmp))
@@ -479,8 +466,7 @@ def test_random_preimage_letter_h():
 def test_gkiam_mutag():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    from gk_iam import gk_iam_nearest_multi
    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
          'extra_params': {}} # node nsymb
 #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -535,7 +521,7 @@ def test_gkiam_mutag():
                print(g.edges(data=True))
        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = median_distance(g_best[0], Gn_let)
        sod_tmp, _ = ged_median(g_best[0], Gn_let)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        sod_ks_min_list.append(sod_ks)
@@ -553,9 +539,7 @@ def test_gkiam_mutag():
 # Re-test.
 def retest_the_simple_two():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    from test_random_mutag import remove_edges
    from gk_iam import gk_iam_nearest_multi
    # The two simple graphs.
 #    g1 = nx.Graph(name='haha')
@@ -653,7 +637,7 @@ def retest_the_simple_two():
    # compute the corresponding sod in graph space.
    for idx, item in enumerate(alpha_range):
        sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
        sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
--- a/preimage/test_preimage_iam.py
+++ b/preimage/test_preimage_iam.py
@@ -10,20 +10,23 @@ import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 from tqdm import tqdm
 import random
 #from tqdm import tqdm
 import os
 #import os
 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from utils import remove_edges, compute_kernel, get_same_item_indices
 from ged import ged_median
 from preimage_iam import preimage_iam 
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 # tests on different values on grid of median-sets and k.
 def test_preimage_mix_2combination_all_pairs():
    from preimage_iam import preimage_iam_random_mix, compute_kernel
    from iam import median_distance
 def test_preimage_iam_grid_k_median_nb():       
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -32,13 +35,11 @@ def test_preimage_mix_2combination_all_pairs():
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    r_max = 5 # iteration limit for pre-image.
 #    alpha_range = np.linspace(0.5, 0.5, 1)
 #    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    InitRandomWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
@@ -52,153 +53,280 @@ def test_preimage_mix_2combination_all_pairs():
    removeNodes = True
    connected_iam = False
    nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
    nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
    # test on each pair of graphs.
 #    for idx1 in range(len(Gn) - 1, -1, -1):
 #        for idx2 in range(idx1, -1, -1):
    for idx1 in range(187, 188):
        for idx2 in range(167, 168):
            g1 = Gn[idx1].copy()
            g2 = Gn[idx2].copy()
        #    Gn[10] = []
        #    Gn[10] = []
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # number of nearest neighbors.
    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    nb_updated_k_list = []
    g_best = []
    for idx_nb, nb_median in enumerate(nb_median_range):
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        sod_gs_min_list.append([])
        nb_updated_list.append([])
        nb_updated_k_list.append([])
        g_best.append([])   
        for k in k_range:
            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
            print('k =', k)
            time0 = time.time()
            dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
                preimage_iam(Gn, Gn_median,
                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
                gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                            'removeNodes': removeNodes, 'connected': connected_iam},
                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                            'saveGXL': saveGXL})
            time_total = time.time() - time0 + time_km
            print('time: ', time_total)
            time_list[idx_nb].append(time_total)
            print('\nsmallest distance in kernel space: ', dhat) 
            dis_ks_min_list[idx_nb].append(dhat)
            g_best[idx_nb].append(ghat_list)
            print('\nnumber of updates of the best graph by IAM: ', nb_updated)
            nb_updated_list[idx_nb].append(nb_updated)
            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k)
            nb_updated_k_list[idx_nb].append(nb_updated_k)
            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
            plt.show()
            plt.clf()
            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
            plt.show()
            # show the best graph and save it to file.
            print('the shortest distance is', dhat)
            print('one of the possible corresponding pre-images is')
            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                    with_labels=True)
            plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
                        '_k' + str(k) + '.png', format="PNG")
    #        plt.show()
            plt.clf()
    #        print(ghat_list[0].nodes(data=True))
    #        print(ghat_list[0].edges(data=True))
            # compute the corresponding sod in graph space.
            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                         ged_method=ged_method, saveGXL=saveGXL)
            sod_gs_list[idx_nb].append(sod_tmp)
            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
          sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
          nb_updated_list)
    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
          nb_updated_k_list)
    print('\ntimes:', time_list)
            ###################################################################            
 #            Gn_mix = [g.copy() for g in Gn]
 #            Gn_mix.append(g1.copy())
 #            Gn_mix.append(g2.copy())
 #            
 #            # compute
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
 #            
 #            # write Gram matrix to file and read it.
 #            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
            ###################################################################
            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
            km = gmfile['gm']
            time_km = gmfile['gmtime']
            # modify mixed gram matrix.
            for i in range(len(Gn)):
                km[i, len(Gn)] = km[i, idx1]
                km[i, len(Gn) + 1] = km[i, idx2]
                km[len(Gn), i] = km[i, idx1]
                km[len(Gn) + 1, i] = km[i, idx2]
            km[len(Gn), len(Gn)] = km[idx1, idx1]
            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
            ###################################################################
 #            # use only the two graphs in median set as candidates.
 #            Gn = [g1.copy(), g2.copy()]
 #            Gn_mix = Gn + [g1.copy(), g2.copy()]
 #            # compute         
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_preimage_iam_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
            time_list = []
            dis_ks_min_list = []
            sod_gs_list = []
            sod_gs_min_list = []
            nb_updated_list_iam = []
            nb_updated_list_random = []
            nb_updated_k_list_iam = []
            nb_updated_k_list_random = []
            g_best = []
            # for each alpha
            for alpha in alpha_range:
                print('\n-------------------------------------------------------\n')
                print('alpha =', alpha)
                time0 = time.time()
                dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
                    nb_updated_k_iam, nb_updated_k_random = \
                    preimage_iam_random_mix(Gn, [g1, g2],
                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
                    l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
                    InitRandomWithAllDk=InitRandomWithAllDk,
                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                                'removeNodes': removeNodes, 'connected': connected_iam},
                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                                'saveGXL': saveGXL})
                time_total = time.time() - time0 + time_km
                print('time: ', time_total)
                time_list.append(time_total)
                dis_ks_min_list.append(dhat)
                g_best.append(ghat_list)
                nb_updated_list_iam.append(nb_updated_iam)       
                nb_updated_list_random.append(nb_updated_random)
                nb_updated_k_list_iam.append(nb_updated_k_iam)       
                nb_updated_k_list_random.append(nb_updated_k_random) 
            # show best graphs and save them to file.
            for idx, item in enumerate(alpha_range):
                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
                print('one of the possible corresponding pre-images is')
                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                        with_labels=True)
                plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) 
                            + '_alpha' + str(item) + '.png', format="PNG")
 #                plt.show()
                plt.clf()
 #                print(g_best[idx][0].nodes(data=True))
 #                print(g_best[idx][0].edges(data=True))
        #        for g in g_best[idx]:
        #            draw_Letter_graph(g, savepath='results/gk_iam/')
        ##            nx.draw_networkx(g)
        ##            plt.show()
        #            print(g.nodes(data=True))
        #            print(g.edges(data=True))
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
            # compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
                                             ged_method=ged_method, saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
            print('\nsods in graph space: ', sod_gs_list)
            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
            print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
            print('\nnumber of updates of the best graph for each alpha by random generation: ', 
                  nb_updated_list_random)
            print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', 
                  nb_updated_k_list_iam)
            print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', 
                  nb_updated_k_list_random)
            print('\ntimes:', time_list)
            nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
            nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
            preimage_iam(Gn, Gn_median,
            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
            gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                        'removeNodes': removeNodes, 'connected': connected_iam},
            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                        'saveGXL': saveGXL})
            str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
                % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
            with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
                content = file.read()
                file.seek(0, 0)
                file.write(str_fw + content)
        time_total = time.time() - time0 + time_km
        print('\ntime: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat) 
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        print('\nnumber of updates of the best graph: ', nb_updated)
        nb_updated_list.append(nb_updated)
        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
        nb_updated_k_list.append(nb_updated_k)
        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                with_labels=True)
 #        plt.show()
        plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
                    '.png', format="PNG")
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
          nb_updated_list)
    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
          nb_updated_k_list)
    print('\ntimes:', time_list)
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 def test_gkiam_2combination_all_pairs():
    from preimage_iam import preimage_iam, compute_kernel
    from iam import median_distance
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -334,7 +462,7 @@ def test_gkiam_2combination_all_pairs():
            # compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
                                             ged_method=ged_method, saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))
@@ -358,8 +486,7 @@ def test_gkiam_2combination_all_pairs():
 def test_gkiam_2combination():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    from gk_iam import gk_iam_nearest_multi
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -451,7 +578,7 @@ def test_gkiam_2combination():
    # compute the corresponding sod in graph space.
    for idx, item in enumerate(alpha_range):
        sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, 
        sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
@@ -463,148 +590,6 @@ def test_gkiam_2combination():
    print('\ntimes:', time_list)
 def test_random_preimage_2combination():
 #    from gk_iam import compute_kernel
    from preimage_random import preimage_random
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:12]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
 #    print(dis_max, dis_min, dis_mean)
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l = 500
    alpha_range = np.linspace(0, 1, 11)
    k = 5 # k nearest neighbors
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
 #    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
 #    plt.show()
 #    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
 #    plt.show()    
    ######################################################################
 #    Gn_mix = [g.copy() for g in Gn]
 #    Gn_mix.append(g1.copy())
 #    Gn_mix.append(g2.copy())
 #    
 ##    g_tmp = iam([g1, g2])
 ##    nx.draw_networkx(g_tmp)
 ##    plt.show()
 #    
 #    # compute 
 #    time0 = time.time()
 #    km = compute_kernel(Gn_mix, gkernel, True)
 #    time_km = time.time() - time0
    ###################################################################
    idx1 = idx_gi[0]
    idx2 = idx_gi[1]
    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']
    # modify mixed gram matrix.
    for i in range(len(Gn)):
        km[i, len(Gn)] = km[i, idx1]
        km[i, len(Gn) + 1] = km[i, idx2]
        km[len(Gn), i] = km[i, idx1]
        km[len(Gn) + 1, i] = km[i, idx2]
    km[len(Gn), len(Gn)] = km[idx1, idx1]
    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
    ###################################################################
    time_list = []
    nb_updated_list = []
    g_best = []
    dis_ks_min_list = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], 
                                          range(len(Gn), len(Gn) + 2), km,
                                          k, r_max, l, gkernel)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat)
        nb_updated_list.append(nb_updated)
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
                with_labels=True)
        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.show()
        plt.clf()
        print(g_best[idx].nodes(data=True))
        print(g_best[idx].edges(data=True))
 #        # compute the corresponding sod in graph space. (alpha range not considered.)
 #        sod_tmp, _ = median_distance(g_best[0], Gn_let)
 #        sod_gs_list.append(sod_tmp)
 #        sod_gs_min_list.append(np.min(sod_tmp))
 #        sod_ks_min_list.append(sod_ks)
 #        nb_updated_list.append(nb_updated)
 #    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 ###############################################################################
 # help functions
 def remove_edges(Gn):
    for G in Gn:
        for _, _, attrs in G.edges(data=True):
            attrs.clear()
 def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
    from gk_iam import compute_kernel
    dis_mat = np.empty((len(Gn), len(Gn)))
    if Kmatrix == None:
        Kmatrix = compute_kernel(Gn, gkernel, True)
    for i in range(len(Gn)):
        for j in range(i, len(Gn)):
            dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
            if dis < 0:
                if dis > -1e-10:
                    dis = 0
                else:
                    raise ValueError('The distance is negative.')
            dis_mat[i, j] = np.sqrt(dis)
            dis_mat[j, i] = dis_mat[i, j]
    dis_max = np.max(np.max(dis_mat))
    dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
    dis_mean = np.mean(np.mean(dis_mat))
    return dis_mat, dis_max, dis_min, dis_mean
 ###############################################################################
@@ -612,7 +597,13 @@ if __name__ == '__main__':
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 #    test_random_preimage_2combination()
 #    test_gkiam_2combination()
 #    test_gkiam_2combination_all_pairs()
    test_preimage_mix_2combination_all_pairs()
 ###############################################################################
 # tests on different numbers of median-sets.
    test_preimage_iam_median_nb()
 ###############################################################################
 # tests on different values on grid of median-sets and k.
 #    test_preimage_iam_grid_k_median_nb()
--- a/preimage/test_preimage_mix.py
+++ b/preimage/test_preimage_mix.py
@@ -0,0 +1,542 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 #from tqdm import tqdm
 #import os
 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from ged import ged_median
 from utils import compute_kernel, get_same_item_indices, remove_edges
 from preimage_iam import preimage_iam_random_mix
 ###############################################################################
 # tests on different values on grid of median-sets and k.
 def test_preimage_mix_grid_k_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
 #    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    InitRandomWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # number of nearest neighbors.
    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list_iam = []
    nb_updated_list_random = []
    nb_updated_k_list_iam = []
    nb_updated_k_list_random = []
    g_best = []
    for idx_nb, nb_median in enumerate(nb_median_range):
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        sod_gs_min_list.append([])
        nb_updated_list_iam.append([])
        nb_updated_list_random.append([])
        nb_updated_k_list_iam.append([])
        nb_updated_k_list_random.append([])
        g_best.append([])   
        for k in k_range:
            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
            print('k =', k)
            time0 = time.time()
            dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
                nb_updated_k_iam, nb_updated_k_random = \
                preimage_iam_random_mix(Gn, Gn_median,
                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
                l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
                InitRandomWithAllDk=InitRandomWithAllDk,
                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                            'removeNodes': removeNodes, 'connected': connected_iam},
                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                            'saveGXL': saveGXL})
            time_total = time.time() - time0 + time_km
            print('time: ', time_total)
            time_list[idx_nb].append(time_total)
            print('\nsmallest distance in kernel space: ', dhat) 
            dis_ks_min_list[idx_nb].append(dhat)
            g_best[idx_nb].append(ghat_list)
            print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
            nb_updated_list_iam[idx_nb].append(nb_updated_iam)
            print('\nnumber of updates of the best graph by random generation: ', 
                  nb_updated_random)
            nb_updated_list_random[idx_nb].append(nb_updated_random)
            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
            nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam)
            print('\nnumber of updates of k nearest graphs by random generation: ', 
                  nb_updated_k_random)
            nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) 
            # show the best graph and save it to file.
            print('the shortest distance is', dhat)
            print('one of the possible corresponding pre-images is')
            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                    with_labels=True)
            plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
                        '_k' + str(k) + '.png', format="PNG")
    #        plt.show()
            plt.clf()
    #        print(ghat_list[0].nodes(data=True))
    #        print(ghat_list[0].edges(data=True))
            # compute the corresponding sod in graph space.
            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                         ged_method=ged_method, saveGXL=saveGXL)
            sod_gs_list[idx_nb].append(sod_tmp)
            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
          sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
          nb_updated_list_iam)
    print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', 
          nb_updated_list_random)
    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
          nb_updated_k_list_iam)
    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', 
          nb_updated_k_list_random)
    print('\ntimes:', time_list)
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_preimage_mix_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    InitRandomWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list_iam = []
    nb_updated_list_random = []
    nb_updated_k_list_iam = []
    nb_updated_k_list_random = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
            nb_updated_k_iam, nb_updated_k_random = \
            preimage_iam_random_mix(Gn, Gn_median,
            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
            l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
            InitRandomWithAllDk=InitRandomWithAllDk,
            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                        'removeNodes': removeNodes, 'connected': connected_iam},
            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                        'saveGXL': saveGXL})
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat) 
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
        nb_updated_list_iam.append(nb_updated_iam)
        print('\nnumber of updates of the best graph by random generation: ', 
              nb_updated_random)
        nb_updated_list_random.append(nb_updated_random)
        print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
        nb_updated_k_list_iam.append(nb_updated_k_iam)
        print('\nnumber of updates of k nearest graphs by random generation: ', 
              nb_updated_k_random)
        nb_updated_k_list_random.append(nb_updated_k_random) 
        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                with_labels=True)
        plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
                    '.png', format="PNG")
 #        plt.show()
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
          nb_updated_list_iam)
    print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', 
          nb_updated_list_random)
    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
          nb_updated_k_list_iam)
    print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', 
          nb_updated_k_list_random)
    print('\ntimes:', time_list)
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 def test_preimage_mix_2combination_all_pairs():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l_max = 500 # update limit for random generation
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    InitRandomWithAllDk = True
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # parameters for IAM function
    c_ei=1
    c_er=1
    c_es=1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
    nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
    # test on each pair of graphs.
 #    for idx1 in range(len(Gn) - 1, -1, -1):
 #        for idx2 in range(idx1, -1, -1):
    for idx1 in range(187, 188):
        for idx2 in range(167, 168):
            g1 = Gn[idx1].copy()
            g2 = Gn[idx2].copy()
        #    Gn[10] = []
        #    Gn[10] = []
            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
            plt.show()
            plt.clf()
            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
            plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
            plt.show()
            plt.clf()
            ###################################################################            
 #            Gn_mix = [g.copy() for g in Gn]
 #            Gn_mix.append(g1.copy())
 #            Gn_mix.append(g2.copy())
 #            
 #            # compute
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
 #            
 #            # write Gram matrix to file and read it.
 #            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
            ###################################################################
            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
            km = gmfile['gm']
            time_km = gmfile['gmtime']
            # modify mixed gram matrix.
            for i in range(len(Gn)):
                km[i, len(Gn)] = km[i, idx1]
                km[i, len(Gn) + 1] = km[i, idx2]
                km[len(Gn), i] = km[i, idx1]
                km[len(Gn) + 1, i] = km[i, idx2]
            km[len(Gn), len(Gn)] = km[idx1, idx1]
            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
            ###################################################################
 #            # use only the two graphs in median set as candidates.
 #            Gn = [g1.copy(), g2.copy()]
 #            Gn_mix = Gn + [g1.copy(), g2.copy()]
 #            # compute         
 #            time0 = time.time()
 #            km = compute_kernel(Gn_mix, gkernel, True)
 #            time_km = time.time() - time0
            time_list = []
            dis_ks_min_list = []
            sod_gs_list = []
            sod_gs_min_list = []
            nb_updated_list_iam = []
            nb_updated_list_random = []
            nb_updated_k_list_iam = []
            nb_updated_k_list_random = []
            g_best = []
            # for each alpha
            for alpha in alpha_range:
                print('\n-------------------------------------------------------\n')
                print('alpha =', alpha)
                time0 = time.time()
                dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
                    nb_updated_k_iam, nb_updated_k_random = \
                    preimage_iam_random_mix(Gn, [g1, g2],
                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
                    l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
                    InitRandomWithAllDk=InitRandomWithAllDk,
                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                                'removeNodes': removeNodes, 'connected': connected_iam},
                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
                                'saveGXL': saveGXL})
                time_total = time.time() - time0 + time_km
                print('time: ', time_total)
                time_list.append(time_total)
                dis_ks_min_list.append(dhat)
                g_best.append(ghat_list)
                nb_updated_list_iam.append(nb_updated_iam)       
                nb_updated_list_random.append(nb_updated_random)
                nb_updated_k_list_iam.append(nb_updated_k_iam)       
                nb_updated_k_list_random.append(nb_updated_k_random) 
            # show best graphs and save them to file.
            for idx, item in enumerate(alpha_range):
                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
                print('one of the possible corresponding pre-images is')
                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
                        with_labels=True)
                plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) 
                            + '_alpha' + str(item) + '.png', format="PNG")
 #                plt.show()
                plt.clf()
 #                print(g_best[idx][0].nodes(data=True))
 #                print(g_best[idx][0].edges(data=True))
        #        for g in g_best[idx]:
        #            draw_Letter_graph(g, savepath='results/gk_iam/')
        ##            nx.draw_networkx(g)
        ##            plt.show()
        #            print(g.nodes(data=True))
        #            print(g.edges(data=True))
            # compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
                                             ged_method=ged_method, saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))
            print('\nsods in graph space: ', sod_gs_list)
            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
            print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
            print('\nnumber of updates of the best graph for each alpha by random generation: ', 
                  nb_updated_list_random)
            print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', 
                  nb_updated_k_list_iam)
            print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', 
                  nb_updated_k_list_random)
            print('\ntimes:', time_list)
            nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
            nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
            str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
                % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
            with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
                content = file.read()
                file.seek(0, 0)
                file.write(str_fw + content)
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 #    test_preimage_mix_2combination_all_pairs()
 ###############################################################################
 # tests on different numbers of median-sets.
 #    test_preimage_mix_median_nb()
 ###############################################################################
 # tests on different values on grid of median-sets and k.
    test_preimage_mix_grid_k_median_nb()
--- a/preimage/test_preimage_random.py
+++ b/preimage/test_preimage_random.py
@@ -0,0 +1,402 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Sep  5 15:59:00 2019
@author: ljia
 """
 import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 import time
 import random
 #from tqdm import tqdm
 #import os
 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from preimage_random import preimage_random
 from ged import ged_median
 from utils import compute_kernel, get_same_item_indices, remove_edges
 ###############################################################################
 # tests on different values on grid of median-sets and k.
 def test_preimage_random_grid_k_median_nb():    
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
 #    k = 5 # k nearest neighbors
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # number of nearest neighbors.
    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    g_best = []
    for idx_nb, nb_median in enumerate(nb_median_range):
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        sod_gs_min_list.append([])
        nb_updated_list.append([])
        g_best.append([])   
        for k in k_range:
            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
            print('k =', k)
            time0 = time.time()
            dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
                range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
            time_total = time.time() - time0 + time_km
            print('time: ', time_total)
            time_list[idx_nb].append(time_total)
            print('\nsmallest distance in kernel space: ', dhat) 
            dis_ks_min_list[idx_nb].append(dhat)
            g_best[idx_nb].append(ghat)
            print('\nnumber of updates of the best graph: ', nb_updated)
            nb_updated_list[idx_nb].append(nb_updated)
            # show the best graph and save it to file.
            print('the shortest distance is', dhat)
            print('one of the possible corresponding pre-images is')
            nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
                    with_labels=True)
            plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
                        '_k' + str(k) + '.png', format="PNG")
    #        plt.show()
            plt.clf()
    #        print(ghat_list[0].nodes(data=True))
    #        print(ghat_list[0].edges(data=True))
            # compute the corresponding sod in graph space.
            sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
                                         ged_method=ged_method, saveGXL=saveGXL)
            sod_gs_list[idx_nb].append(sod_tmp)
            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
          sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
          nb_updated_list)
    print('\ntimes:', time_list)
 ###############################################################################
 # tests on different numbers of median-sets.
 def test_preimage_random_median_nb():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l = 500 # update limit for random generation
 #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5 # k nearest neighbors
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
 #        for g in Gn_median:
 #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
 ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
 #            plt.show()
 #            plt.clf()                         
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
            range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat) 
        dis_ks_min_list.append(dhat)
        g_best.append(ghat)
        print('\nnumber of updates of the best graph: ', nb_updated)
        nb_updated_list.append(nb_updated)
        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
                with_labels=True)
        plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
                    '.png', format="PNG")
 #        plt.show()
        plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
                                     ged_method=ged_method, saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs: ', 
          nb_updated_list)
    print('\ntimes:', time_list)
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 def test_random_preimage_2combination():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    Gn = Gn[0:12]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
 #    print(dis_max, dis_min, dis_mean)
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l = 500
    alpha_range = np.linspace(0, 1, 11)
    k = 5 # k nearest neighbors
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
 #    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
 #    plt.show()
 #    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
 #    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
 #    plt.show()    
    ######################################################################
 #    Gn_mix = [g.copy() for g in Gn]
 #    Gn_mix.append(g1.copy())
 #    Gn_mix.append(g2.copy())
 #    
 ##    g_tmp = iam([g1, g2])
 ##    nx.draw_networkx(g_tmp)
 ##    plt.show()
 #    
 #    # compute 
 #    time0 = time.time()
 #    km = compute_kernel(Gn_mix, gkernel, True)
 #    time_km = time.time() - time0
    ###################################################################
    idx1 = idx_gi[0]
    idx2 = idx_gi[1]
    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']
    # modify mixed gram matrix.
    for i in range(len(Gn)):
        km[i, len(Gn)] = km[i, idx1]
        km[i, len(Gn) + 1] = km[i, idx2]
        km[len(Gn), i] = km[i, idx1]
        km[len(Gn) + 1, i] = km[i, idx2]
    km[len(Gn), len(Gn)] = km[idx1, idx1]
    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
    ###################################################################
    time_list = []
    nb_updated_list = []
    g_best = []
    dis_ks_min_list = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], 
                                          range(len(Gn), len(Gn) + 2), km,
                                          k, r_max, l, gkernel)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat)
        nb_updated_list.append(nb_updated)
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
                with_labels=True)
        plt.show()
        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.clf()
        print(g_best[idx].nodes(data=True))
        print(g_best[idx].edges(data=True))
 #        # compute the corresponding sod in graph space. (alpha range not considered.)
 #        sod_tmp, _ = median_distance(g_best[0], Gn_let)
 #        sod_gs_list.append(sod_tmp)
 #        sod_gs_min_list.append(np.min(sod_tmp))
 #        sod_ks_min_list.append(sod_ks)
 #        nb_updated_list.append(nb_updated)
 #    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
 ###############################################################################
 if __name__ == '__main__':
 ###############################################################################
 # test on the combination of the two randomly chosen graphs. (the same as in the
 # random pre-image paper.)
 #    test_random_preimage_2combination()
 ###############################################################################
 # tests all algorithms on different numbers of median-sets.
    test_preimage_random_median_nb()
 ###############################################################################
 # tests all algorithms on different values on grid of median-sets and k.
 #    test_preimage_random_grid_k_median_nb()
--- a/preimage/utils.py
+++ b/preimage/utils.py
@@ -0,0 +1,109 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Thu Oct 17 19:05:07 2019
 Useful functions.
@author: ljia
 """
 #import networkx as nx
 import multiprocessing
 import numpy as np
 import sys
 sys.path.insert(0, "../")
 from pygraph.kernels.marginalizedKernel import marginalizedkernel
 from pygraph.kernels.untilHPathKernel import untilhpathkernel
 from pygraph.kernels.spKernel import spkernel
 import functools
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
 from pygraph.kernels.structuralspKernel import structuralspkernel
 def remove_edges(Gn):
    for G in Gn:
        for _, _, attrs in G.edges(data=True):
            attrs.clear()
 def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
    term1 = Kmatrix[idx_g, idx_g]
    term2 = 0
    for i, a in enumerate(alpha):
        term2 += a * Kmatrix[idx_g, idx_gi[i]]
    term2 *= 2
    if withterm3 == False:
        for i1, a1 in enumerate(alpha):
            for i2, a2 in enumerate(alpha):
                term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    return np.sqrt(term1 - term2 + term3)
 def compute_kernel(Gn, graph_kernel, verbose):
    if graph_kernel == 'marginalizedkernel':
        Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
                                  p_quit=0.03, n_iteration=10, remove_totters=False,
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'untilhpathkernel':
        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
                                  depth=10, k_func='MinMax', compute_method='trie',
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'spkernel':
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
        Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'structuralspkernel':
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
        Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    # normalization
    Kmatrix_diag = Kmatrix.diagonal().copy()
    for i in range(len(Kmatrix)):
        for j in range(i, len(Kmatrix)):
            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
            Kmatrix[j][i] = Kmatrix[i][j]
    return Kmatrix
 def gram2distances(Kmatrix):
    dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
    for i1 in range(len(Kmatrix)):
        for i2 in range(len(Kmatrix)):
            dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
    dmatrix = np.sqrt(dmatrix)
    return dmatrix
 def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
    dis_mat = np.empty((len(Gn), len(Gn)))
    if Kmatrix == None:
        Kmatrix = compute_kernel(Gn, gkernel, True)
    for i in range(len(Gn)):
        for j in range(i, len(Gn)):
            dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
            if dis < 0:
                if dis > -1e-10:
                    dis = 0
                else:
                    raise ValueError('The distance is negative.')
            dis_mat[i, j] = np.sqrt(dis)
            dis_mat[j, i] = dis_mat[i, j]
    dis_max = np.max(np.max(dis_mat))
    dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
    dis_mean = np.mean(np.mean(dis_mat))
    return dis_mat, dis_max, dis_min, dis_mean
 def get_same_item_indices(ls):
    """Get the indices of the same items in a list. Return a dict keyed by items.
    """
    idx_dict = {}
    for idx, item in enumerate(ls):
        if item in idx_dict:
            idx_dict[item].append(idx)
        else:
            idx_dict[item] = [idx]
    return idx_dict