diff --git a/notebooks/run_spkernel.py b/notebooks/run_spkernel.py
index 0f98b3e..fcae61f 100644
--- a/notebooks/run_spkernel.py
+++ b/notebooks/run_spkernel.py
@@ -74,6 +74,7 @@ for ds in dslist:
         extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
         ds_name=ds['name'],
         n_jobs=multiprocessing.cpu_count(),
+#        n_jobs=7,
         read_gm_from_file=False,
         verbose=True)
     print()
diff --git a/preimage/fitDistance.py b/preimage/fitDistance.py
index 81ef711..5268014 100644
--- a/preimage/fitDistance.py
+++ b/preimage/fitDistance.py
@@ -18,31 +18,44 @@ from scipy import optimize
 import cvxpy as cp
 
 import sys
-sys.path.insert(0, "../")
+#sys.path.insert(0, "../")
 from ged import GED, get_nb_edit_operations
 from utils import kernel_distance_matrix
 
-def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
+def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, 
+                               fitkernel=None, gamma=1.0):
     # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
-    random.seed(1)
-    cost_rdm = random.sample(range(1, 10), 5)
-    edit_costs = cost_rdm + [0]
+#    random.seed(1)
+    cost_rdm = random.sample(range(1, 10), 6)
+#    edit_costs = cost_rdm + [0]
+    edit_costs = cost_rdm
+#    edit_costs = [i * 0.01 for i in cost_rdm] + [0]
 #    edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
 #    edit_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
 #    edit_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
-    idx_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
+    idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
     
     # compute distances in feature space.
-    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel)
+    coef_dk = 1
+    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel)
     dis_k_vec = []
     for i in range(len(dis_k_mat)):
         for j in range(i, len(dis_k_mat)):
             dis_k_vec.append(dis_k_mat[i, j])
     dis_k_vec = np.array(dis_k_vec)
+    if fitkernel == None:
+        dis_k_vec_ajusted = dis_k_vec
+    elif fitkernel == 'gaussian':
+        coef_dk = 1 / np.max(dis_k_vec)
+        idx_dk_nonzeros = np.where(dis_k_vec != 0)[0]
+        # remove 0's and constraint d_k between 0 and 1.
+        dis_k_vec = dis_k_vec[idx_dk_nonzeros] * coef_dk
+        dis_k_vec_ajusted = np.sqrt(-np.log(dis_k_vec) / gamma)
     
     residual_list = []
     edit_cost_list = []
     time_list = []
+    nb_cost_mat_list = []
     
     for itr in range(itr_max):
         print('\niteration', itr)
@@ -52,15 +65,23 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
         edit_cost_list.append(edit_cost_constant)
         
         ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_cost_constant, 
-            idx_nonzeros, parallel=True)
+            idx_cost_nonzeros, parallel=True)
                 
-        residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
+        if fitkernel == None:
+            residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
+        elif fitkernel == 'gaussian':
+            ged_all = np.array(ged_all)[idx_dk_nonzeros]
+            residual = np.sqrt(np.sum(np.square(
+                    np.exp(-gamma * ged_all ** 2) / coef_dk - dis_k_vec)))
         residual_list.append(residual)
         
         # "fit" geds to distances in feature space by tuning edit costs using the
         # Least Squares Method.
         nb_cost_mat = np.array(n_edit_operations).T
-        edit_costs_new, residual = compute_better_costs(nb_cost_mat, dis_k_vec)
+        if fitkernel == 'gaussian':
+            nb_cost_mat = nb_cost_mat[idx_dk_nonzeros]
+        nb_cost_mat_list.append(nb_cost_mat)
+        edit_costs_new, residual = compute_better_costs(nb_cost_mat, dis_k_vec_ajusted)
 
         print('pseudo residual:', residual)
         for i in range(len(edit_costs_new)):
@@ -70,7 +91,7 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
                 else:
                     raise ValueError('The edit cost is negative.')
         
-        for idx, item in enumerate(idx_nonzeros):
+        for idx, item in enumerate(idx_cost_nonzeros):
             edit_costs[item] = edit_costs_new[idx]
         
         time_list.append(time.time() - time0)
@@ -78,14 +99,21 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
         print('edit_costs:', edit_costs)
         print('residual_list:', residual_list)
         
-        
+    print()
     edit_cost_list.append(edit_costs)
     ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_costs, 
-            idx_nonzeros, parallel=True)
-    residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
+            idx_cost_nonzeros, parallel=True)
+    if fitkernel == 0:
+        residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
+    elif fitkernel == 'gaussian':
+        ged_all = np.array(ged_all)[idx_dk_nonzeros]
+        residual = np.sqrt(np.sum(np.square(
+                np.exp(-gamma * ged_all ** 2) / coef_dk - dis_k_vec)))
     residual_list.append(residual)
+    nb_cost_mat_list.append(np.array(n_edit_operations).T)
     
-    return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list
+    return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, \
+        time_list, nb_cost_mat_list, coef_dk
 
 
 def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False):
@@ -193,7 +221,10 @@ def compute_better_costs(nb_cost_mat, dis_k_vec):
 #    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
     x = cp.Variable(nb_cost_mat.shape[1])
     cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
-    constraints = [x >= [0 for i in range(nb_cost_mat.shape[1])]]
+    constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
+#                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
+                   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
+                   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
     prob = cp.Problem(cp.Minimize(cost), constraints)
     prob.solve()
     edit_costs_new = x.value
diff --git a/preimage/ged.py b/preimage/ged.py
index 49b4403..eaa7294 100644
--- a/preimage/ged.py
+++ b/preimage/ged.py
@@ -9,11 +9,14 @@ import numpy as np
 import networkx as nx
 from tqdm import tqdm
 import sys
+import multiprocessing
+from multiprocessing import Pool
+from functools import partial
 
 from gedlibpy import librariesImport, gedlibpy
 
 def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
-        edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50):
+        edit_cost_constant=[], stabilizer='min', repeat=50):
     """
     Compute GED for 2 graphs.
     """
@@ -25,9 +28,11 @@ def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP',
             G_new = nx.Graph()
             for nd, attrs in G.nodes(data=True):
                 G_new.add_node(str(nd), chem=attrs['atom'])
+#                G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
+#                               y=str(attrs['attributes'][1]))
             for nd1, nd2, attrs in G.edges(data=True):
-#                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-                G_new.add_edge(str(nd1), str(nd2))
+                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
+#                G_new.add_edge(str(nd1), str(nd2))
                 
             return G_new
         
@@ -49,6 +54,32 @@ def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP',
             pi_backward = gedlibpy.get_backward_map(g, h)
             upper = gedlibpy.get_upper_bound(g, h)
             lower = gedlibpy.get_lower_bound(g, h)        
+        elif stabilizer == 'mean':
+            # @todo: to be finished...
+            upper_list = [np.inf] * repeat
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
+                pi_forward = gedlibpy.get_forward_map(g, h)
+                pi_backward = gedlibpy.get_backward_map(g, h)
+                lower = gedlibpy.get_lower_bound(g, h)
+            upper = np.mean(upper_list)
+        elif stabilizer == 'median':
+            if repeat % 2 == 0:
+                repeat += 1
+            upper_list = [np.inf] * repeat
+            pi_forward_list = [0] * repeat
+            pi_backward_list = [0] * repeat
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
+                pi_forward_list[itr] = gedlibpy.get_forward_map(g, h)
+                pi_backward_list[itr] = gedlibpy.get_backward_map(g, h)
+                lower = gedlibpy.get_lower_bound(g, h)
+            upper = np.median(upper_list)
+            idx_median = upper_list.index(upper)
+            pi_forward = pi_forward_list[idx_median]
+            pi_backward = pi_backward_list[idx_median]
         elif stabilizer == 'min':
             upper = np.inf
             for itr in range(repeat):                
@@ -61,6 +92,18 @@ def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP',
                     lower = gedlibpy.get_lower_bound(g, h)
                 if upper == 0:
                     break
+        elif stabilizer == 'max':
+            upper = 0
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_tmp = gedlibpy.get_upper_bound(g, h)                
+                if upper_tmp > upper:
+                    upper = upper_tmp
+                    pi_forward = gedlibpy.get_forward_map(g, h)
+                    pi_backward = gedlibpy.get_backward_map(g, h)
+                    lower = gedlibpy.get_lower_bound(g, h)
+        elif stabilizer == 'gaussian':
+            pass
                     
         dis = upper
         
@@ -138,23 +181,69 @@ def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP',
     return dis, pi_forward, pi_backward
 
 
-def ged_median(Gn, Gn_median, measure='ged', verbose=False, 
-                    ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
-    dis_list = []
-    pi_forward_list = []
-    for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
-                       file=sys.stdout) if verbose else enumerate(Gn):
-        dis_sum = 0
-        pi_forward_list.append([])
-        for G_p in Gn_median:
-            dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
-                cost=ged_cost, method=ged_method, saveGXL=saveGXL)
-            pi_forward_list[idx].append(pi_tmp_forward)
-            dis_sum += dis_tmp
-        dis_list.append(dis_sum)
+def ged_median(Gn, Gn_median, verbose=False, params_ged={'lib': 'gedlibpy', 
+               'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], 
+               'stabilizer': 'min', 'repeat': 50}, parallel=False):
+    if parallel:
+        len_itr = int(len(Gn))
+        pi_forward_list = [[] for i in range(len_itr)]
+        dis_list = [0 for i in range(len_itr)]
+               
+        itr = range(0, len_itr)
+        n_jobs = multiprocessing.cpu_count()
+        if len_itr < 100 * n_jobs:
+            chunksize = int(len_itr / n_jobs) + 1
+        else:
+            chunksize = 100
+        def init_worker(gn_toshare, gn_median_toshare):
+            global G_gn, G_gn_median
+            G_gn = gn_toshare
+            G_gn_median = gn_median_toshare
+        do_partial = partial(_compute_ged_median, params_ged)
+        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn, Gn_median))
+        if verbose:
+            iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
+                            desc='computing GEDs', file=sys.stdout)
+        else:
+            iterator = pool.imap_unordered(do_partial, itr, chunksize)
+        for i, dis_sum, pi_forward in iterator:
+            pi_forward_list[i] = pi_forward
+            dis_list[i] = dis_sum
+#            print('\n-------------------------------------------')
+#            print(i, j, idx_itr, dis)
+        pool.close()
+        pool.join()
+        
+    else:
+        dis_list = []
+        pi_forward_list = []
+        for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
+                           file=sys.stdout) if verbose else enumerate(Gn):
+            dis_sum = 0
+            pi_forward_list.append([])
+            for G_p in Gn_median:
+                dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
+                    **params_ged)
+                pi_forward_list[idx].append(pi_tmp_forward)
+                dis_sum += dis_tmp
+            dis_list.append(dis_sum)
+            
     return dis_list, pi_forward_list
 
 
+def _compute_ged_median(params_ged, itr):
+#    print(itr)
+    dis_sum = 0
+    pi_forward = []
+    for G_p in G_gn_median:
+        dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_gn[itr], G_p, 
+                    **params_ged)
+        pi_forward.append(pi_tmp_forward)
+        dis_sum += dis_tmp
+        
+    return itr, dis_sum, pi_forward
+
+
 def get_nb_edit_operations(g1, g2, forward_map, backward_map):
     """Compute the number of each edit operations.
     """
diff --git a/preimage/iam.py b/preimage/iam.py
index a7b01b9..fa38582 100644
--- a/preimage/iam.py
+++ b/preimage/iam.py
@@ -22,20 +22,22 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
         epsilon=0.001, node_label='atom', edge_label='bond_type', 
         connected=False, removeNodes=True, allBestInit=False, allBestNodes=False,
         allBestEdges=False, allBestOutput=False,
-        params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 'saveGXL': 'benoit'}):
+        params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
+                    'edit_cost_constant': [], 'stabilizer': 'min', 'repeat': 50}):
     """See my name, then you know what I do.
     """
 #    Gn_median = Gn_median[0:10]
 #    Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
-    if removeNodes:
-        node_ir = np.inf # corresponding to the node remove and insertion.
-        label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
+    node_ir = np.inf # corresponding to the node remove and insertion.
+    label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
     ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, 
                                       attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], 
                                       edge_label=edge_label)
+    node_label_set = get_node_labels(Gn_median, node_label)
+    edge_label_set = get_edge_labels(Gn_median, edge_label)
 
     
-    def generate_graph(G, pi_p_forward, label_set):
+    def generate_graph(G, pi_p_forward):
         G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
 #        nx.draw_networkx(G)
 #        import matplotlib.pyplot as plt
@@ -52,7 +54,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
             for ndi, (nd, _) in enumerate(G.nodes(data=True)):
                 h_i0_list = []
                 label_list = []
-                for label in label_set:
+                for label in node_label_set:
                     h_i0 = 0
                     for idx, g in enumerate(Gn_median):
                         pi_i = pi_p_forward[idx][ndi]
@@ -62,7 +64,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
                     label_list.append(label)
                 # case when the node is to be removed.
                 if removeNodes:
-                    h_i0_remove = 0 # @todo: maybe this can be added to the label_set above.
+                    h_i0_remove = 0 # @todo: maybe this can be added to the node_label_set above.
                     for idx, g in enumerate(Gn_median):
                         pi_i = pi_p_forward[idx][ndi]
                         if pi_i == node_ir:
@@ -91,11 +93,10 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
                     G_new_list = [ggg.copy() for ggg in G_new_list_nd]
                 else: 
                     # choose one of the best randomly.
-                    h_ij0_max = h_i0_list[idx_max[0]]
                     idx_rdm = random.randint(0, len(idx_max) - 1)
                     best_label = label_list[idx_max[idx_rdm]]
-                           
-                    # check whether a_ij is 0 or 1.
+                    h_i0_max = h_i0_list[idx_max[idx_rdm]]
+
                     g_new = G_new_list[0]
                     if best_label == label_r:
                         g_new.remove_node(nd) 
@@ -134,8 +135,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
 #                for nd1, nd2, _ in g_new.edges(data=True): 
                         h_ij0_list = []
                         label_list = []
-                        # @todo: compute edge label set before.
-                        for label in get_edge_labels(Gn_median, edge_label):
+                        for label in edge_label_set:
                             h_ij0 = 0
                             for idx, g in enumerate(Gn_median):
                                 pi_i = pi_p_forward[idx][nd1i]
@@ -176,9 +176,9 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
                                     G_new_list_ed.append(g_tmp_copy)
                             g_tmp_list = [ggg.copy() for ggg in G_new_list_ed]
                         else: # choose one of the best randomly.
-                            h_ij0_max = h_ij0_list[idx_max[0]]
                             idx_rdm = random.randint(0, len(idx_max) - 1)
                             best_label = label_list[idx_max[idx_rdm]]
+                            h_ij0_max = h_ij0_list[idx_max[idx_rdm]]
                                    
                             # check whether a_ij is 0 or 1.
                             sij_norm = 0
@@ -192,6 +192,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
                                     g_new.add_edge(nd1, nd2)
                                 g_new.edges[nd1, nd2][edge_label] = best_label
                             else:
+#                            elif h_ij0_max < len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
                                 if g_new.has_edge(nd1, nd2):
                                     g_new.remove_edge(nd1, nd2) 
                             g_tmp_list = [g_new]
@@ -221,8 +222,8 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
                             if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
                                 and not g_tmp.has_edge(nd1, nd2):
                                 g_tmp.add_edge(nd1, nd2)
-#                        else: # @todo: which to use?
-                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
+                        else: # @todo: which to use?
+#                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
                             if g_tmp.has_edge(nd1, nd2):
                                 g_tmp.remove_edge(nd1, nd2)
                         # do not change anything when equal.     
@@ -238,7 +239,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
 #        # find the best graph generated in this iteration and update pi_p.
         # @todo: should we update all graphs generated or just the best ones?
         dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, 
-            **params_ged)
+            params_ged=params_ged)
         # @todo: should we remove the identical and connectivity check? 
         # Don't know which is faster.
         if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
@@ -283,15 +284,16 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
 #        while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or
 #                                 np.abs(old_sod - cur_sod) == 0):
         while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
+#        while itr < ite_max:
 #        for itr in range(0, 5): # the convergence condition?
             print('itr_iam is', itr)
             G_new_list = []
             pi_forward_new_list = []
             dis_new_list = []
             for idx, g in enumerate(G_list):
-                label_set = get_node_labels(Gn_median + [g], node_label)                        
+#                label_set = get_node_labels(Gn_median + [g], node_label)                        
                 G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
-                    g, pi_forward_list[idx], label_set)
+                    g, pi_forward_list[idx])
                 G_new_list += G_tmp_list
                 pi_forward_new_list += pi_forward_tmp_list
                 dis_new_list += dis_tmp_list
@@ -325,7 +327,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
             
         print('\nsods:', sod_list, '\n')
             
-        return G_list, pi_forward_list, dis_min
+        return G_list, pi_forward_list, dis_min, sod_list
     
     
     def remove_duplicates(Gn):
@@ -363,7 +365,8 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
     # compute set-median.
     dis_min = np.inf
     dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
-        **params_ged)
+        params_ged=params_ged, parallel=True)
+    print('finish computing GEDs.')
     # find all smallest distances.
     if allBestInit: # try all best init graphs.
         idx_min_list = range(len(dis_list))
@@ -371,19 +374,26 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
     else:
         idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
         dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list)
+        idx_min_rdm = random.randint(0, len(idx_min_list) - 1)
+        idx_min_list = [idx_min_list[idx_min_rdm]]
+    sod_set_median = np.min(dis_min)
         
     
     # phase 2: iteration.
     G_list = []
     dis_list = []
     pi_forward_list = []
+    G_set_median_list = []
+#    sod_list = []
     for idx_tmp, idx_min in enumerate(idx_min_list):
 #        print('idx_min is', idx_min)
         G = Gn_candidate[idx_min].copy()
+        G_set_median_list.append(G.copy())
         # list of edit operations.        
         pi_p_forward = pi_forward_all[idx_min]
 #        pi_p_backward = pi_all_backward[idx_min]        
-        Gi_list, pi_i_forward_list, dis_i_min = iteration_proc(G, pi_p_forward, dis_min[idx_tmp])            
+        Gi_list, pi_i_forward_list, dis_i_min, sod_list = iteration_proc(G, 
+                                                pi_p_forward, dis_min[idx_tmp])            
         G_list += Gi_list
         dis_list += [dis_i_min] * len(Gi_list)
         pi_forward_list += pi_i_forward_list
@@ -409,9 +419,9 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
 #        print(g.edges(data=True))
     
     # get the best median graphs
-    G_min_list, pi_forward_min_list, dis_min = best_median_graphs(
+    G_gen_median_list, pi_forward_min_list, sod_gen_median = best_median_graphs(
             G_list, pi_forward_list, dis_list)
-#    for g in G_min_list:
+#    for g in G_gen_median_list:
 #        nx.draw_networkx(g)
 #        plt.show()
 #        print(g.nodes(data=True))
@@ -419,10 +429,10 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
     
     if not allBestOutput:
         # randomly choose one graph.
-        idx_rdm = random.randint(0, len(G_min_list) - 1)
-        G_min_list = [G_min_list[idx_rdm]]
+        idx_rdm = random.randint(0, len(G_gen_median_list) - 1)
+        G_gen_median_list = [G_gen_median_list[idx_rdm]]
     
-    return G_min_list, dis_min
+    return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median
 
 
 
diff --git a/preimage/median.py b/preimage/median.py
index 2844043..ed3e6cd 100644
--- a/preimage/median.py
+++ b/preimage/median.py
@@ -5,8 +5,8 @@ import numpy as np
 import networkx as nx
 import time
 
-import librariesImport
-import script
+from gedlibpy import librariesImport, gedlibpy
+#import script
 sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
 import pygraph
 from pygraph.utils.graphfiles import loadDataset
diff --git a/preimage/preimage_iam.py b/preimage/preimage_iam.py
index ff16955..bf79d0e 100644
--- a/preimage/preimage_iam.py
+++ b/preimage/preimage_iam.py
@@ -27,8 +27,9 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
                  params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                              'ite_max': 50, 'epsilon': 0.001, 
                              'removeNodes': True, 'connected': False},
-                 params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 
-                             'saveGXL': 'benoit'}):
+                 params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
+                             'edit_cost_constant': [], 'stabilizer': 'min', 
+                             'repeat': 50}):
     """This function constructs graph pre-image by the iterative pre-image 
     framework in reference [1], algorithm 1, where the step of generating new 
     graphs randomly is replaced by the IAM algorithm in reference [2].
@@ -91,12 +92,12 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
             ghat_new_list = []
             for g_tmp in Gk:
                 Gn_nearest_init = [g_tmp.copy()]
-                ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, 
+                ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median, 
                         Gn_nearest_init, params_ged=params_ged, **params_iam)
                 ghat_new_list += ghat_new_list_tmp
         else: # only the best graph in D_k is used to initialize IAM.
             Gn_nearest_init = [g.copy() for g in Gk]
-            ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
+            ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
                     params_ged=params_ged, **params_iam)
 
 #        for g in g_tmp_list:
@@ -181,8 +182,9 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
                             params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                                         'ite_max': 50, 'epsilon': 0.001, 
                                         'removeNodes': True, 'connected': False},
-                            params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 
-                                        'saveGXL': 'benoit'}):
+                            params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 
+                                        'method': 'IPFP', 'edit_cost_constant': [], 
+                                        'stabilizer': 'min', 'repeat': 50}):
     """This function constructs graph pre-image by the iterative pre-image 
     framework in reference [1], algorithm 1, where new graphs are generated 
     randomly and by the IAM algorithm in reference [2].
diff --git a/preimage/test_fitDistance.py b/preimage/test_fitDistance.py
index 4ee26b6..bc941d4 100644
--- a/preimage/test_fitDistance.py
+++ b/preimage/test_fitDistance.py
@@ -7,7 +7,10 @@ Created on Thu Oct 24 11:50:56 2019
 """
 from matplotlib import pyplot as plt
 import numpy as np
+from tqdm import tqdm
 
+import sys
+sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from utils import remove_edges
 from fitDistance import fit_GED_to_kernel_distance
@@ -21,21 +24,22 @@ def test_anycosts():
     remove_edges(Gn)
     gkernel = 'marginalizedkernel'
     itr_max = 10
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \
-        fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
+    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
     total_time = np.sum(time_list)
     print('\nedit_costs:', edit_costs)
     print('\nresidual_list:', residual_list)
     print('\nedit_cost_list:', edit_cost_list)
     print('\ndistance matrix in kernel space:', dis_k_mat)
     print('\nged matrix:', ged_mat)
-    print('total time:', total_time)
+    print('\ntotal time:', total_time)
+    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
     np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, 
              residual_list=residual_list, edit_cost_list=edit_cost_list,
              dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time)
+             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
     
-    # normalized distance matrices.
+#    # normalized distance matrices.
 #    gmfile = np.load('results/fit_distance.any_costs.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
@@ -43,72 +47,256 @@ def test_anycosts():
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
+##    nb_cost_mat_list = gmfile['nb_cost_mat_list']
     
     norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
     plt.imshow(norm_dis_k_mat)
     plt.colorbar()
     plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_dis_k_mat.any_costs' + '.jpg', format='jpg')
+#    plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png')
 #    plt.show()
     plt.clf()
+    
     norm_ged_mat = normalize_distance_matrix(ged_mat)
     plt.imshow(norm_ged_mat)
     plt.colorbar()
     plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_ged_mat.any_costs' + '.jpg', format='jpg')
+#    plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_diff = norm_ged_mat - norm_dis_k_mat
+    plt.imshow(norm_diff)
+    plt.colorbar()
+    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps', format='eps', dpi=300)
+#    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png')
 #    plt.show()
     plt.clf()
+#    draw_count_bar(norm_diff)
     
 
 def test_cs_leq_ci_plus_cr():
     """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er
     """
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:10]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
     itr_max = 10
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \
-        fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
+    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+                                                      gkernel, itr_max,
+                                                      fitkernel='gaussian')
     total_time = np.sum(time_list)
     print('\nedit_costs:', edit_costs)
     print('\nresidual_list:', residual_list)
     print('\nedit_cost_list:', edit_cost_list)
     print('\ndistance matrix in kernel space:', dis_k_mat)
     print('\nged matrix:', ged_mat)
-    print('total time:', total_time)
-    np.savez('results/fit_distance.cs_leq_ci_plus_cr.gm', edit_costs=edit_costs, 
+    print('\ntotal time:', total_time)
+    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+    np.savez('results/fit_distance.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel.gm', 
+             edit_costs=edit_costs, 
              residual_list=residual_list, edit_cost_list=edit_cost_list,
              dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time)
+             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
+             coef_dk=coef_dk)
+    
+#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+##    Gn = Gn[0:10]
+##    remove_edges(Gn)
+#    gkernel = 'untilhpathkernel'
+#    node_label = 'atom'
+#    edge_label = 'bond_type'
+#    itr_max = 10
+#    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+#        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+#                                                      gkernel, itr_max)
+#    total_time = np.sum(time_list)
+#    print('\nedit_costs:', edit_costs)
+#    print('\nresidual_list:', residual_list)
+#    print('\nedit_cost_list:', edit_cost_list)
+#    print('\ndistance matrix in kernel space:', dis_k_mat)
+#    print('\nged matrix:', ged_mat)
+#    print('\ntotal time:', total_time)
+#    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.mutag.elabeled.uhpkernel.gm', 
+#             edit_costs=edit_costs, 
+#             residual_list=residual_list, edit_cost_list=edit_cost_list,
+#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
+    
+    
+#    # normalized distance matrices.
+#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
+#    edit_costs = gmfile['edit_costs']
+#    residual_list = gmfile['residual_list']
+#    edit_cost_list = gmfile['edit_cost_list']
+#    dis_k_mat = gmfile['dis_k_mat']
+#    ged_mat = gmfile['ged_mat']
+#    total_time = gmfile['total_time']
+#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+#    coef_dk = gmfile['coef_dk']
+    
+    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
+    print(nb_consistent, nb_inconsistent, ratio_consistent)
+    
+#    dis_k_sub = pairwise_substitution(dis_k_mat)
+#    ged_sub = pairwise_substitution(ged_mat)    
+#    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.cost_leq_1en2.gm', 
+#             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
+    
+    
+    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
+    plt.imshow(norm_dis_k_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_ged_mat = normalize_distance_matrix(ged_mat)
+    plt.imshow(norm_ged_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_diff = norm_ged_mat - norm_dis_k_mat
+    plt.imshow(norm_diff)
+    plt.colorbar()
+    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+#    draw_count_bar(norm_diff)
+    
+    
+def test_unfitted():
+    """unfitted.
+    """  
+    from fitDistance import compute_geds
+    from utils import kernel_distance_matrix
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:10]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+        
+
+#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+##    Gn = Gn[0:10]
+##    remove_edges(Gn)
+#    gkernel = 'marginalizedkernel'
+
+    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel)
+    ged_all, ged_mat, n_edit_operations = compute_geds(Gn, [3, 3, 1, 3, 3, 1], 
+            [0, 1, 2, 3, 4, 5], parallel=True)
+    print('\ndistance matrix in kernel space:', dis_k_mat)
+    print('\nged matrix:', ged_mat)
+#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.gm', edit_costs=edit_costs, 
+#             residual_list=residual_list, edit_cost_list=edit_cost_list,
+#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list) 
     
     # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.gm.npz')
+#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en3.gm.npz')
 #    edit_costs = gmfile['edit_costs']
 #    residual_list = gmfile['residual_list']
 #    edit_cost_list = gmfile['edit_cost_list']
 #    dis_k_mat = gmfile['dis_k_mat']
 #    ged_mat = gmfile['ged_mat']
 #    total_time = gmfile['total_time']
+#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+    
+    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
+    print(nb_consistent, nb_inconsistent, ratio_consistent)
     
     norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
     plt.imshow(norm_dis_k_mat)
     plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg')
+    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.png', format='png')
 #    plt.show()
     plt.clf()
+    
     norm_ged_mat = normalize_distance_matrix(ged_mat)
     plt.imshow(norm_ged_mat)
     plt.colorbar()
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg')
+    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_diff = norm_ged_mat - norm_dis_k_mat
+    plt.imshow(norm_diff)
+    plt.colorbar()
+    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
+    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.png', format='png')
 #    plt.show()
     plt.clf()
+    draw_count_bar(norm_diff)
+    
+    
+def pairwise_substitution_consistence(mat1, mat2):
+    """
+    """
+    nb_consistent = 0
+    nb_inconsistent = 0
+    # the matrix is considered symmetric.
+    upper_tri1 = mat1[np.triu_indices_from(mat1)]
+    upper_tri2 = mat2[np.tril_indices_from(mat2)]
+    for i in tqdm(range(len(upper_tri1)), desc='computing consistence', file=sys.stdout):
+        for j in range(i, len(upper_tri1)):
+            if np.sign(upper_tri1[i] - upper_tri1[j]) == np.sign(upper_tri2[i] - upper_tri2[j]):
+                nb_consistent += 1
+            else:
+                nb_inconsistent += 1
+    return nb_consistent, nb_inconsistent, nb_consistent / (nb_consistent + nb_inconsistent)
+
+
+def pairwise_substitution(mat):
+    # the matrix is considered symmetric.
+    upper_tri = mat[np.triu_indices_from(mat)]
+    sub_list = []
+    for i in tqdm(range(len(upper_tri)), desc='computing', file=sys.stdout):
+        for j in range(i, len(upper_tri)):
+            sub_list.append(upper_tri[i] - upper_tri[j])
+    return sub_list
+    
+    
+def draw_count_bar(norm_diff):
+    import pandas
+    from collections import Counter, OrderedDict
+    norm_diff_cnt = norm_diff.flatten()
+    norm_diff_cnt = norm_diff_cnt * 10
+    norm_diff_cnt = np.floor(norm_diff_cnt)
+    norm_diff_cnt = Counter(norm_diff_cnt)
+    norm_diff_cnt = OrderedDict(sorted(norm_diff_cnt.items()))
+    df = pandas.DataFrame.from_dict(norm_diff_cnt, orient='index')
+    df.plot(kind='bar')
     
     
 if __name__ == '__main__':
-    test_anycosts()
-    test_cs_leq_ci_plus_cr()
\ No newline at end of file
+#    test_anycosts()
+#    test_cs_leq_ci_plus_cr()
+    test_unfitted()
+    
+#    x = np.array([[1,2,3],[4,5,6],[7,8,9]])
+#    xx = pairwise_substitution(x)
\ No newline at end of file
diff --git a/preimage/test_iam.py b/preimage/test_iam.py
index ee51e4b..82d5446 100644
--- a/preimage/test_iam.py
+++ b/preimage/test_iam.py
@@ -17,9 +17,363 @@ import random
 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
+#from pygraph.utils.logger2file import *
 from iam import iam_upgraded
-from utils import remove_edges, compute_kernel, get_same_item_indices
-from ged import ged_median
+from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
+#from ged import ged_median
+
+def test_iam_monoterpenoides():
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+    # parameters for GED function from the IAM paper.
+    # fitted edit costs (Gaussian).
+    c_vi = 0.03620133402089074
+    c_vr = 0.0417574590207099
+    c_vs = 0.009992282328587499
+    c_ei = 0.08293120042342755
+    c_er = 0.09512220476358019
+    c_es = 0.09222529696841467
+#    # fitted edit costs (linear combinations).
+#    c_vi = 0.1749684054238749
+#    c_vr = 0.0734054228711457
+#    c_vs = 0.05017781726016715
+#    c_ei = 0.1869431164806936
+#    c_er = 0.32055856948274
+#    c_es = 0.2569469379247611
+#    # unfitted edit costs.
+#    c_vi = 3
+#    c_vr = 3
+#    c_vs = 1
+#    c_ei = 3
+#    c_er = 3
+#    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+    ged_cost = 'CONSTANT'
+    ged_method = 'IPFP'
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # classify graphs according to letters.
+    time_list = []
+    dis_ks_min_list = []
+    dis_ks_set_median_list = []
+    sod_gs_list = []
+    g_best = []
+    sod_set_median_list = []
+    sod_list_list = []
+    idx_dict = get_same_item_indices(y_all)
+    for y_class in idx_dict:
+        print('\n-------------------------------------------------------')
+        print('class of y:', y_class)
+        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        dis_ks_set_median_list.append([])
+        sod_gs_list.append([])
+        g_best.append([])
+        sod_set_median_list.append([])
+        
+        for repeat in range(50):
+            idx_rdm = random.sample(range(len(Gn_class)), 10)
+            print('graphs chosen:', idx_rdm)
+            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
+            Gn_candidate = [g.copy() for g in Gn_median]
+        
+            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
+            time0 = time.time()
+            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
+            = iam_upgraded(Gn_median, 
+                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+                params_ged=params_ged)
+            time_total = time.time() - time0
+            print('\ntime: ', time_total)
+            time_list[-1].append(time_total)
+            g_best[-1].append(G_gen_median_list[0])
+            sod_set_median_list[-1].append(sod_set_median)
+            print('\nsmallest sod of the set median:', sod_set_median)
+            sod_gs_list[-1].append(sod_gen_median)
+            print('\nsmallest sod in graph space:', sod_gen_median)
+            sod_list_list.append(sod_list)
+            
+            # show the best graph and save it to file.
+            print('one of the possible corresponding pre-images is')
+            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
+                    with_labels=True)
+#            plt.show()
+    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+#            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
+#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
+#                        '.png', format="PNG")
+            plt.clf()
+    #        print(G_gen_median_list[0].nodes(data=True))
+    #        print(G_gen_median_list[0].edges(data=True))
+            
+    
+            # compute distance between \psi and the set median graph.
+            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
+                gkernel, node_label, edge_label, False)
+            dhat_new_set_median_list = []
+            for idx, g_tmp in enumerate(G_set_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
+                    len(G_set_median_list) + len(Gn_median) + 1), 
+                    alpha_range, knew_set_median, withterm3=False))
+                
+            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
+            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
+            
+            
+            # compute distance between \psi and the new generated graphs.
+            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
+                              edge_label, False)
+            dhat_new_list = []
+            for idx, g_tmp in enumerate(G_gen_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
+                                    len(G_gen_median_list) + len(Gn_median) + 1), 
+                                    alpha_range, knew, withterm3=False))
+                
+            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+            dis_ks_min_list[-1].append(dhat_new_list[0])
+            
+
+        print('\nsods of the set median for this class:', sod_set_median_list[-1])
+        print('\nsods in graph space for this class:', sod_gs_list[-1])
+        print('\ndistance in kernel space of set median for this class:', 
+              dis_ks_set_median_list[-1])
+        print('\nsmallest distances in kernel space for this class:', 
+              dis_ks_min_list[-1])   
+        print('\ntimes for this class:', time_list[-1])
+        
+        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
+        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
+        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
+        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
+        time_list[-1] = np.mean(time_list[-1])
+        
+    print()
+    print('\nmean sods of the set median for each class:', sod_set_median_list)
+    print('\nmean sods in graph space for each class:', sod_gs_list)
+    print('\ndistances in kernel space of set median for each class:', 
+            dis_ks_set_median_list)
+    print('\nmean smallest distances in kernel space for each class:', 
+            dis_ks_min_list)
+    print('\nmean times for each class:', time_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
+    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
+    print('\nmean distances in kernel space of set median of all:', 
+            np.mean(dis_ks_set_median_list))
+    print('\nmean smallest distances in kernel space of all:', 
+            np.mean(dis_ks_min_list))
+    print('\nmean times of all:', np.mean(time_list))
+    
+    nb_better_sods = 0
+    nb_worse_sods = 0
+    nb_same_sods = 0
+    for sods in sod_list_list:
+        if sods[0] > sods[-1]:
+            nb_better_sods += 1
+        elif sods[0] < sods[-1]:
+            nb_worse_sods += 1
+        else:
+            nb_same_sods += 1
+    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
+          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
+          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
+          'sods are improved.')
+    
+    
+def test_iam_mutag():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+    # parameters for GED function from the IAM paper.
+    # fitted edit costs.
+    c_vi = 0.03523843108436513
+    c_vr = 0.03347339739350128
+    c_vs = 0.06871290673612238
+    c_ei = 0.08591999846720685
+    c_er = 0.07962086440894103
+    c_es = 0.08596855855478233
+    # unfitted edit costs.
+#    c_vi = 3
+#    c_vr = 3
+#    c_vs = 1
+#    c_ei = 3
+#    c_er = 3
+#    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+    ged_cost = 'CONSTANT'
+    ged_method = 'IPFP'
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # classify graphs according to letters.
+    time_list = []
+    dis_ks_min_list = []
+    dis_ks_set_median_list = []
+    sod_gs_list = []
+    g_best = []
+    sod_set_median_list = []
+    sod_list_list = []
+    idx_dict = get_same_item_indices(y_all)
+    for y_class in idx_dict:
+        print('\n-------------------------------------------------------')
+        print('class of y:', y_class)
+        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        dis_ks_set_median_list.append([])
+        sod_gs_list.append([])
+        g_best.append([])
+        sod_set_median_list.append([])
+        
+        for repeat in range(50):
+            idx_rdm = random.sample(range(len(Gn_class)), 10)
+            print('graphs chosen:', idx_rdm)
+            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
+            Gn_candidate = [g.copy() for g in Gn_median]
+        
+            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
+            time0 = time.time()
+            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
+            = iam_upgraded(Gn_median, 
+                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+                params_ged=params_ged)
+            time_total = time.time() - time0
+            print('\ntime: ', time_total)
+            time_list[-1].append(time_total)
+            g_best[-1].append(G_gen_median_list[0])
+            sod_set_median_list[-1].append(sod_set_median)
+            print('\nsmallest sod of the set median:', sod_set_median)
+            sod_gs_list[-1].append(sod_gen_median)
+            print('\nsmallest sod in graph space:', sod_gen_median)
+            sod_list_list.append(sod_list)
+            
+            # show the best graph and save it to file.
+            print('one of the possible corresponding pre-images is')
+            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
+                    with_labels=True)
+#            plt.show()
+    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+#            plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) + 
+#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
+#                        '.png', format="PNG")
+            plt.clf()
+    #        print(G_gen_median_list[0].nodes(data=True))
+    #        print(G_gen_median_list[0].edges(data=True))
+            
+    
+            # compute distance between \psi and the set median graph.
+            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
+                gkernel, node_label, edge_label, False)
+            dhat_new_set_median_list = []
+            for idx, g_tmp in enumerate(G_set_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
+                    len(G_set_median_list) + len(Gn_median) + 1), 
+                    alpha_range, knew_set_median, withterm3=False))
+                
+            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
+            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
+            
+            
+            # compute distance between \psi and the new generated graphs.
+            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
+                              edge_label, False)
+            dhat_new_list = []
+            for idx, g_tmp in enumerate(G_gen_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
+                                    len(G_gen_median_list) + len(Gn_median) + 1), 
+                                    alpha_range, knew, withterm3=False))
+                
+            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+            dis_ks_min_list[-1].append(dhat_new_list[0])
+            
+
+        print('\nsods of the set median for this class:', sod_set_median_list[-1])
+        print('\nsods in graph space for this class:', sod_gs_list[-1])
+        print('\ndistance in kernel space of set median for this class:', 
+              dis_ks_set_median_list[-1])
+        print('\nsmallest distances in kernel space for this class:', 
+              dis_ks_min_list[-1])   
+        print('\ntimes for this class:', time_list[-1])
+        
+        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
+        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
+        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
+        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
+        time_list[-1] = np.mean(time_list[-1])
+        
+    print()
+    print('\nmean sods of the set median for each class:', sod_set_median_list)
+    print('\nmean sods in graph space for each class:', sod_gs_list)
+    print('\ndistances in kernel space of set median for each class:', 
+            dis_ks_set_median_list)
+    print('\nmean smallest distances in kernel space for each class:', 
+            dis_ks_min_list)
+    print('\nmean times for each class:', time_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
+    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
+    print('\nmean distances in kernel space of set median of all:', 
+            np.mean(dis_ks_set_median_list))
+    print('\nmean smallest distances in kernel space of all:', 
+            np.mean(dis_ks_min_list))
+    print('\nmean times of all:', np.mean(time_list))
+    
+    nb_better_sods = 0
+    nb_worse_sods = 0
+    nb_same_sods = 0
+    for sods in sod_list_list:
+        if sods[0] > sods[-1]:
+            nb_better_sods += 1
+        elif sods[0] < sods[-1]:
+            nb_worse_sods += 1
+        else:
+            nb_same_sods += 1
+    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
+          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
+          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
+          'sods are improved.')
+    
 
 ###############################################################################
 # tests on different numbers of median-sets.
@@ -33,46 +387,352 @@ def test_iam_median_nb():
     remove_edges(Gn)
     gkernel = 'marginalizedkernel'
     
-#    lmbda = 0.03 # termination probalility
-#    r_max = 10 # iteration limit for pre-image.
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-#    k = 5 # k nearest neighbors
-#    epsilon = 1e-6
-#    InitIAMWithAllDk = True
+    lmbda = 0.03 # termination probalility
+#    # parameters for GED function
+#    c_vi = 0.037
+#    c_vr = 0.038
+#    c_vs = 0.075
+#    c_ei = 0.001
+#    c_er = 0.001
+#    c_es = 0.0
+#    ite_max_iam = 50
+#    epsilon_iam = 0.001
+#    removeNodes = False
+#    connected_iam = False
+#    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+#    ged_method = 'IPFP'
+#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    ged_stabilizer = 'min'
+#    ged_repeat = 50
+#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+#                  'edit_cost_constant': edit_cost_constant, 
+#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
     # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
+    c_vi = 4
+    c_vr = 4
+    c_vs = 2
+    c_ei = 1
+    c_er = 1
+    c_es = 1
     ite_max_iam = 50
     epsilon_iam = 0.001
     removeNodes = False
     connected_iam = False
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    # parameters for IAM function
+    ged_cost = 'CHEM_1'
+    ged_method = 'IPFP'
+    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
     
     # find out all the graphs classified to positive group 1.
     idx_dict = get_same_item_indices(y_all)
     Gn = [Gn[i] for i in idx_dict[1]]
     
+    # number of graphs; we what to compute the median of these graphs. 
+#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    nb_median_range = [len(Gn)]
+    
 #    # compute Gram matrix.
 #    time0 = time.time()
 #    km = compute_kernel(Gn, gkernel, True)
 #    time_km = time.time() - time0    
 #    # write Gram matrix to file.
 #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+#    sod_gs_min_list = []
+#    nb_updated_list = []
+#    nb_updated_k_list = []
+    g_best = []
+    for nb_median in nb_median_range:
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        Gn_candidate = [g.copy() for g in Gn]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+#        km_tmp = gmfile['gm']
+#        time_km = gmfile['gmtime']
+#        # modify mixed gram matrix.
+#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+#        for i in range(len(Gn)):
+#            for j in range(i, len(Gn)):
+#                km[i, j] = km_tmp[i, j]
+#                km[j, i] = km[i, j]
+#        for i in range(len(Gn)):
+#            for j, idx in enumerate(idx_rdm):
+#                km[i, len(Gn) + j] = km[i, idx]
+#                km[len(Gn) + j, i] = km[i, idx]
+#        for i, idx1 in enumerate(idx_rdm):
+#            for j, idx2 in enumerate(idx_rdm):
+#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        time0 = time.time()
+        ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate, 
+            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+            params_ged=params_ged)
+            
+        time_total = time.time() - time0
+        print('\ntime: ', time_total)
+        time_list.append(time_total)
+        
+        # compute distance between \psi and the new generated graphs.
+        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
+        dhat_new_list = []
+        for idx, g_tmp in enumerate(ghat_new_list):
+            # @todo: the term3 below could use the one at the beginning of the function.
+            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
+                                len(ghat_new_list) + len(Gn_median) + 1), 
+                                alpha_range, knew, withterm3=False))
+            
+        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+        dis_ks_min_list.append(dhat_new_list[0])
+        g_best.append(ghat_new_list[0])
+        
+        # show the best graph and save it to file.
+#        print('the shortest distance is', dhat)
+        print('one of the possible corresponding pre-images is')
+        nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'), 
+                with_labels=True)
+        plt.show()
+#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
+                    '.png', format="PNG")
+        plt.clf()
+#        print(ghat_list[0].nodes(data=True))
+#        print(ghat_list[0].edges(data=True))
+    
+        sod_gs_list.append(sod_min)
+#        sod_gs_min_list.append(np.min(sod_min))
+        print('\nsmallest sod in graph space: ', sod_min)
+        
+    print('\nsods in graph space: ', sod_gs_list)
+#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs: ', 
+          dis_ks_min_list) 
+#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
+#          nb_updated_list)
+#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
+#          nb_updated_k_list)
+    print('\ntimes:', time_list)
+    
+    
+def test_iam_letter_h():
+    from median import draw_Letter_graph
+    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
+#          'extra_params': {}} # node nsymb
+#    Gn = Gn[0:50]
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    gkernel = 'structuralspkernel'
+    
+    # parameters for GED function from the IAM paper.
+    c_vi = 3
+    c_vr = 3
+    c_vs = 1
+    c_ei = 3
+    c_er = 3
+    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+    ged_cost = 'LETTER'
+    ged_method = 'IPFP'
+#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # classify graphs according to letters.
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    g_best = []
+    sod_set_median_list = []
+    idx_dict = get_same_item_indices(y_all)
+    for letter in idx_dict:
+        print('\n-------------------------------------------------------')
+        print('letter', letter)
+        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        sod_gs_list.append([])
+        g_best.append([])
+        sod_set_median_list.append([])
+        
+        for repeat in range(50):
+            idx_rdm = random.sample(range(len(Gn_let)), 50)
+            print('graphs chosen:', idx_rdm)
+            Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
+            Gn_candidate = [g.copy() for g in Gn_median]
+        
+            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
+            time0 = time.time()
+            ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median, 
+                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+                params_ged=params_ged)
+            time_total = time.time() - time0
+            print('\ntime: ', time_total)
+            time_list[-1].append(time_total)
+            g_best[-1].append(ghat_new_list[0])
+            sod_set_median_list[-1].append(sod_set_median)
+            print('\nsmallest sod of the set median:', sod_set_median)
+            sod_gs_list[-1].append(sod_min)
+            print('\nsmallest sod in graph space:', sod_min)
+            
+            # show the best graph and save it to file.
+            print('one of the possible corresponding pre-images is')
+            draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
+            
+            # compute distance between \psi and the new generated graphs.
+            knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
+            dhat_new_list = []
+            for idx, g_tmp in enumerate(ghat_new_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
+                                    len(ghat_new_list) + len(Gn_median) + 1), 
+                                    alpha_range, knew, withterm3=False))
+                
+            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+            dis_ks_min_list[-1].append(dhat_new_list[0])            
+        
+        print('\nsods of the set median for this letter:', sod_set_median_list[-1])
+        print('\nsods in graph space for this letter:', sod_gs_list[-1])
+        print('\nsmallest distances in kernel space for this letter:', 
+              dis_ks_min_list[-1])
+        print('\ntimes for this letter:', time_list[-1])
+        
+        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
+        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
+        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
+        time_list[-1] = np.mean(time_list[-1])
         
+    print('\nmean sods of the set median for each letter:', sod_set_median_list)
+    print('\nmean sods in graph space for each letter:', sod_gs_list)
+    print('\nmean smallest distances in kernel space for each letter:', 
+            dis_ks_min_list)
+    print('\nmean times for each letter:', time_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
+    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
+    print('\nmean smallest distances in kernel space of all:', 
+            np.mean(dis_ks_min_list))
+    print('\nmean times of all:', np.mean(time_list))
+    
+    
+
+    
+
+
+    
+    
+
+def test_iam_fitdistance():
+    
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+#    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+#    lmbda = 0.03 # termination probalility
+#    # parameters for GED function
+#    c_vi = 0.037
+#    c_vr = 0.038
+#    c_vs = 0.075
+#    c_ei = 0.001
+#    c_er = 0.001
+#    c_es = 0.0
+#    ite_max_iam = 50
+#    epsilon_iam = 0.001
+#    removeNodes = False
+#    connected_iam = False
+#    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+#    ged_method = 'IPFP'
+#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    ged_stabilizer = 'min'
+#    ged_repeat = 50
+#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+#                  'edit_cost_constant': edit_cost_constant, 
+#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # parameters for GED function
+    c_vi = 4
+    c_vr = 4
+    c_vs = 2
+    c_ei = 1
+    c_er = 1
+    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+    ged_cost = 'CHEM_1'
+    ged_method = 'IPFP'
+    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+    # number of graphs; we what to compute the median of these graphs. 
+#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    nb_median_range = [10]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
     
     time_list = []
     dis_ks_min_list = []
+    dis_ks_gen_median_list = []
     sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    nb_updated_k_list = []
+#    sod_gs_min_list = []
+#    nb_updated_list = []
+#    nb_updated_k_list = []
     g_best = []
     for nb_median in nb_median_range:
         print('\n-------------------------------------------------------')
@@ -90,72 +750,80 @@ def test_iam_median_nb():
 #            plt.clf()                         
                     
         ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+#        km_tmp = gmfile['gm']
+#        time_km = gmfile['gmtime']
+#        # modify mixed gram matrix.
+#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+#        for i in range(len(Gn)):
+#            for j in range(i, len(Gn)):
+#                km[i, j] = km_tmp[i, j]
+#                km[j, i] = km[i, j]
+#        for i in range(len(Gn)):
+#            for j, idx in enumerate(idx_rdm):
+#                km[i, len(Gn) + j] = km[i, idx]
+#                km[len(Gn) + j, i] = km[i, idx]
+#        for i, idx1 in enumerate(idx_rdm):
+#            for j, idx2 in enumerate(idx_rdm):
+#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
                 
         ###################################################################
         alpha_range = [1 / nb_median] * nb_median
         time0 = time.time()
-        ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate, 
-            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, 
-            epsilon=epsilon_iam, removeNodes=removeNodes, 
-            connected=connected_iam, 
-            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                        'saveGXL': saveGXL})
+        G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
+            = iam_upgraded(Gn_median, Gn_candidate, 
+            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+            params_ged=params_ged)
             
         time_total = time.time() - time0
         print('\ntime: ', time_total)
         time_list.append(time_total)
-        print('\nsmallest distance in kernel space: ', dhat) 
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        print('\nnumber of updates of the best graph: ', nb_updated)
-        nb_updated_list.append(nb_updated)
-        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
-        nb_updated_k_list.append(nb_updated_k)
+        
+        # compute distance between \psi and the new generated graphs.
+        knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
+                              edge_label, False)
+        dhat_new_list = []
+        for idx, g_tmp in enumerate(G_gen_median_list):
+            # @todo: the term3 below could use the one at the beginning of the function.
+            dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
+                                len(G_gen_median_list) + len(Gn_median) + 1), 
+                                alpha_range, knew, withterm3=False))
+            
+        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+        dis_ks_min_list.append(dhat_new_list[0])
+        g_best.append(G_gen_median_list[0])
         
         # show the best graph and save it to file.
-        print('the shortest distance is', dhat)
+#        print('the shortest distance is', dhat)
         print('one of the possible corresponding pre-images is')
-        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
+        nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
                 with_labels=True)
         plt.show()
-        plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
-                    '.png', format="PNG")
+#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+#        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
+#                    '.png', format="PNG")
         plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
     
-        # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
+        sod_gs_list.append(sod_gen_median)
+#        sod_gs_min_list.append(np.min(sod_gen_median))
+        print('\nsmallest sod in graph space: ', sod_gen_median)
+        print('\nsmallest sod of set median in graph space: ', sod_set_median)
         
     print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
+#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
     print('\nsmallest distance in kernel space for each set of median graphs: ', 
           dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-          nb_updated_list)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-          nb_updated_k_list)
+#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
+#          nb_updated_list)
+#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
+#          nb_updated_k_list)
     print('\ntimes:', time_list)
+        
+    
+            
     
     
 ###############################################################################
@@ -164,4 +832,11 @@ def test_iam_median_nb():
 if __name__ == '__main__':
 ###############################################################################
 # tests on different numbers of median-sets.
-    test_iam_median_nb()
\ No newline at end of file
+#    test_iam_median_nb()
+#    test_iam_letter_h()
+    test_iam_monoterpenoides()
+#    test_iam_mutag()
+    
+#    test_iam_fitdistance()
+#    print("test log")
+    
diff --git a/preimage/test_preimage_iam.py b/preimage/test_preimage_iam.py
index 936ce35..34973fd 100644
--- a/preimage/test_preimage_iam.py
+++ b/preimage/test_preimage_iam.py
@@ -192,26 +192,42 @@ def test_preimage_iam_median_nb():
     gkernel = 'marginalizedkernel'
     
     lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
+    r_max = 3 # iteration limit for pre-image.
 #    alpha_range = np.linspace(0.5, 0.5, 1)
     k = 5 # k nearest neighbors
     epsilon = 1e-6
     InitIAMWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
     # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
+#    c_vi = 0.037
+#    c_vr = 0.038
+#    c_vs = 0.075
+#    c_ei = 0.001
+#    c_er = 0.001
+#    c_es = 0.0
+    c_vi = 4
+    c_vr = 4
+    c_vs = 2
+    c_ei = 1
+    c_er = 1
+    c_es = 1
     ite_max_iam = 50
     epsilon_iam = 0.001
     removeNodes = True
     connected_iam = False
+    # parameters for GED function
+#    ged_cost='CHEM_1'
+    ged_cost = 'CONSTANT'
+    ged_method = 'IPFP'
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
     
     # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    nb_median_range = [2]
     
     # find out all the graphs classified to positive group 1.
     idx_dict = get_same_item_indices(y_all)
@@ -274,8 +290,7 @@ def test_preimage_iam_median_nb():
             params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
                         'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                         'removeNodes': removeNodes, 'connected': connected_iam},
-            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                        'saveGXL': saveGXL})
+            params_ged=params_ged)
             
         time_total = time.time() - time0 + time_km
         print('\ntime: ', time_total)
@@ -293,16 +308,15 @@ def test_preimage_iam_median_nb():
         print('one of the possible corresponding pre-images is')
         nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
                 with_labels=True)
-#        plt.show()
-        plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
-                    '.png', format="PNG")
+        plt.show()
+#        plt.savefig('results/preimage_iam/mutag_median_cs.001_nb' + str(nb_median) + 
+#                    '.png', format="PNG")
         plt.clf()
 #        print(ghat_list[0].nodes(data=True))
 #        print(ghat_list[0].edges(data=True))
     
         # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
+        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, params_ged=params_ged)
         sod_gs_list.append(sod_tmp)
         sod_gs_min_list.append(np.min(sod_tmp))
         print('\nsmallest sod in graph space: ', np.min(sod_tmp))
diff --git a/preimage/utils.py b/preimage/utils.py
index 70cb6f5..99c63c0 100644
--- a/preimage/utils.py
+++ b/preimage/utils.py
@@ -39,13 +39,13 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
     return np.sqrt(term1 - term2 + term3)
 
 
-def compute_kernel(Gn, graph_kernel, verbose):
+def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
     if graph_kernel == 'marginalizedkernel':
-        Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
+        Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
                                   p_quit=0.03, n_iteration=10, remove_totters=False,
                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
     elif graph_kernel == 'untilhpathkernel':
-        Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
+        Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
                                   depth=10, k_func='MinMax', compute_method='trie',
                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
     elif graph_kernel == 'spkernel':
@@ -77,10 +77,10 @@ def gram2distances(Kmatrix):
     return dmatrix
 
 
-def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
+def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, gkernel=None):
     dis_mat = np.empty((len(Gn), len(Gn)))
     if Kmatrix == None:
-        Kmatrix = compute_kernel(Gn, gkernel, True)
+        Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
     for i in range(len(Gn)):
         for j in range(i, len(Gn)):
             dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
diff --git a/pygraph/utils/graphfiles.py b/pygraph/utils/graphfiles.py
index e678b83..f5daeda 100644
--- a/pygraph/utils/graphfiles.py
+++ b/pygraph/utils/graphfiles.py
@@ -1,9 +1,9 @@
 """ Utilities function to manage graph files
 """
-
+from os.path import dirname, splitext
 
 def loadCT(filename):
-    """load data from .ct file.
+    """load data from a Chemical Table (.ct) file.
 
     Notes
     ------
@@ -13,8 +13,11 @@ def loadCT(filename):
         0.0000    0.0000    0.0000 C <- each line describes a node (x,y,z + label)
         0.0000    0.0000    0.0000 C
         0.0000    0.0000    0.0000 O
-      1  3  1  1 <- each line describes an edge : to, from,?, label
+      1  3  1  1 <- each line describes an edge : to, from, bond type, bond stereo
       2  3  1  1
+      
+    Check https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=10&ved=2ahUKEwivhaSdjsTlAhVhx4UKHczHA8gQFjAJegQIARAC&url=https%3A%2F%2Fwww.daylight.com%2Fmeetings%2Fmug05%2FKappler%2Fctfile.pdf&usg=AOvVaw1cDNrrmMClkFPqodlF2inS
+    for detailed format discription.
     """
     import networkx as nx
     from os.path import basename
@@ -35,22 +38,15 @@ def loadCT(filename):
         for i in range(0, nb_nodes):
             tmp = content[i + 2].split(" ")
             tmp = [x for x in tmp if x != '']
-            g.add_node(i, atom=tmp[3], label=tmp[3])
+            g.add_node(i, atom=tmp[3].strip(), 
+                       label=[item.strip() for item in tmp[3:]], 
+                       attributes=[item.strip() for item in tmp[0:3]])
         for i in range(0, nb_edges):
             tmp = content[i + g.number_of_nodes() + 2].split(" ")
             tmp = [x for x in tmp if x != '']
-            g.add_edge(
-                int(tmp[0]) - 1,
-                int(tmp[1]) - 1,
-                bond_type=tmp[3].strip(),
-                label=tmp[3].strip())
-
-
-#         for i in range(0, nb_edges):
-#             tmp = content[i + g.number_of_nodes() + 2]
-#             tmp = [tmp[i:i+3] for i in range(0, len(tmp), 3)]
-#             g.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1,
-#                        bond_type=tmp[3].strip(), label=tmp[3].strip())
+            g.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1,
+                       bond_type=tmp[2].strip(),
+                       label=[item.strip() for item in tmp[2:]])
     return g
 
 
@@ -71,6 +67,7 @@ def loadGXL(filename):
             labels[attr.attrib['name']] = attr[0].text
         if 'chem' in labels:
             labels['label'] = labels['chem']
+            labels['atom'] = labels['chem']
         g.add_node(index, **labels)
         index += 1
 
@@ -80,6 +77,7 @@ def loadGXL(filename):
             labels[attr.attrib['name']] = attr[0].text
         if 'valence' in labels:
             labels['label'] = labels['valence']
+            labels['bond_type'] = labels['valence']
         g.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], **labels)
     return g
 
@@ -392,7 +390,7 @@ def loadDataset(filename, filename_y=None, extra_params=None):
     Notes
     -----
     This function supports following graph dataset formats:
-    'ds': load data from .ct file. See comments of function loadCT for a example.
+    'ds': load data from .ds file. See comments of function loadFromDS for a example.
     'cxl': load data from Graph eXchange Language file (.cxl file). See 
         http://www.gupro.de/GXL/Introduction/background.html, 2019 for detail.
     'sdf': load data from structured data file (.sdf file). See 
@@ -406,45 +404,24 @@ def loadDataset(filename, filename_y=None, extra_params=None):
         2019 for details. Note here filename is the name of either .txt file in
         the dataset directory.
     """
-    from os.path import dirname, splitext
-
-    dirname_dataset = dirname(filename)
     extension = splitext(filename)[1][1:]
-    data = []
-    y = []
     if extension == "ds":
-        content = open(filename).read().splitlines()
-        if filename_y is None or filename_y == '':
-            for i in range(0, len(content)):
-                tmp = content[i].split(' ')
-                # remove the '#'s in file names
-                data.append(
-                    loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1)))
-                y.append(float(tmp[1]))
-        else:  # y in a seperate file
-            for i in range(0, len(content)):
-                tmp = content[i]
-                # remove the '#'s in file names
-                data.append(
-                    loadCT(dirname_dataset + '/' + tmp.replace('#', '', 1)))
-            content_y = open(filename_y).read().splitlines()
-            # assume entries in filename and filename_y have the same order.
-            for item in content_y:
-                tmp = item.split(' ')
-                # assume the 3rd entry in a line is y (for Alkane dataset)
-                y.append(float(tmp[2]))
+        data, y = loadFromDS(filename, filename_y)
     elif extension == "cxl":
         import xml.etree.ElementTree as ET
-
+        
+        dirname_dataset = dirname(filename)
         tree = ET.parse(filename)
         root = tree.getroot()
         data = []
         y = []
-        for graph in root.iter('print'):
+        for graph in root.iter('graph'):
             mol_filename = graph.attrib['file']
             mol_class = graph.attrib['class']
             data.append(loadGXL(dirname_dataset + '/' + mol_filename))
             y.append(mol_class)
+    elif extension == 'xml':
+        data, y = loadFromXML(filename, extra_params)
     elif extension == "sdf":
         import numpy as np
         from tqdm import tqdm
@@ -471,6 +448,7 @@ def loadDataset(filename, filename_y=None, extra_params=None):
     elif extension == "mat":
         data, y = loadMAT(filename, extra_params)
     elif extension == 'txt':
+        dirname_dataset = dirname(filename)
         data, y = loadTXT(dirname_dataset)
         # print(len(y))
         # print(y)
@@ -485,6 +463,75 @@ def loadDataset(filename, filename_y=None, extra_params=None):
     return data, y
 
 
+def loadFromXML(filename, extra_params):
+    import xml.etree.ElementTree as ET
+    
+    dirname_dataset = dirname(filename)
+    tree = ET.parse(filename)
+    root = tree.getroot()
+    data = []
+    y = []
+    for graph in root.iter('print'):
+        mol_filename = graph.attrib['file']
+        mol_class = graph.attrib['class']
+        data.append(loadGXL(dirname_dataset + '/' + mol_filename))
+        y.append(mol_class)
+        
+    return data, y
+            
+
+def loadFromDS(filename, filename_y):
+    """Load data from .ds file.
+    Possible graph formats include:
+        '.ct': see function loadCT for detail.
+        '.gxl': see dunction loadGXL for detail.
+    Note these graph formats are checked automatically by the extensions of 
+    graph files.
+    """
+    dirname_dataset = dirname(filename)
+    data = []
+    y = []
+    content = open(filename).read().splitlines()
+    extension = splitext(content[0].split(' ')[0])[1][1:]
+    if filename_y is None or filename_y == '':
+        if extension == 'ct': 
+            for i in range(0, len(content)):
+                tmp = content[i].split(' ')
+                # remove the '#'s in file names
+                data.append(
+                    loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1)))
+                y.append(float(tmp[1]))
+        elif extension == 'gxl':
+            for i in range(0, len(content)):
+                tmp = content[i].split(' ')
+                # remove the '#'s in file names
+                data.append(
+                    loadGXL(dirname_dataset + '/' + tmp[0].replace('#', '', 1)))
+                y.append(float(tmp[1]))
+    else:  # y in a seperate file
+        if extension == 'ct':
+            for i in range(0, len(content)):
+                tmp = content[i]
+                # remove the '#'s in file names
+                data.append(
+                    loadCT(dirname_dataset + '/' + tmp.replace('#', '', 1)))
+        elif extension == 'gxl':
+            for i in range(0, len(content)):
+                tmp = content[i]
+                # remove the '#'s in file names
+                data.append(
+                    loadGXL(dirname_dataset + '/' + tmp.replace('#', '', 1)))                                                       
+                                                               
+        content_y = open(filename_y).read().splitlines()
+        # assume entries in filename and filename_y have the same order.
+        for item in content_y:
+            tmp = item.split(' ')
+            # assume the 3rd entry in a line is y (for Alkane dataset)
+            y.append(float(tmp[2]))
+            
+    return data, y
+                
+
 def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None):
     """Save list of graphs.
     """
@@ -509,7 +556,30 @@ def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None
             
             
 if __name__ == '__main__':    
-    ds = {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
-          'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}  # node/edge symb
-    Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    saveDataset(Gn, y, group='xml', filename='temp/temp')
\ No newline at end of file
+#    ### Load dataset from .ds file.
+#    # .ct files.
+    ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds',
+        'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt'}
+    Gn, y = loadDataset(ds['dataset'], filename_y=ds['dataset_y'])
+#    ds = {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds'}  # node symb
+#    Gn, y = loadDataset(ds['dataset'])
+#    ds = {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds'} # node/edge symb
+#    Gn, y = loadDataset(ds['dataset'])
+#    ds = {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds'} # unlabeled
+#    Gn, y = loadDataset(ds['dataset'])
+    print(Gn[1].nodes(data=True))
+    print(Gn[1].edges(data=True))
+    print(y[1])
+    
+#    # .gxl file.
+#    ds = {'name': 'monoterpenoides', 
+#          'dataset': '../../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+#    Gn, y = loadDataset(ds['dataset'])
+#    print(Gn[1].nodes(data=True))
+#    print(Gn[1].edges(data=True))
+#    print(y[1])
+    
+#    ds = {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
+#          'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}  # node/edge symb
+#    Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    saveDataset(Gn, y, group='xml', filename='temp/temp')
\ No newline at end of file