diff --git a/preimage/fitDistance.py b/preimage/fitDistance.py
index d0d2357..42b9889 100644
--- a/preimage/fitDistance.py
+++ b/preimage/fitDistance.py
@@ -7,6 +7,12 @@ Created on Wed Oct 16 14:20:06 2019
 """
 import numpy as np
 from tqdm import tqdm
+from itertools import combinations_with_replacement
+import multiprocessing
+from multiprocessing import Pool
+from functools import partial
+import time
+import random
 
 from scipy import optimize
 import cvxpy as cp
@@ -19,7 +25,12 @@ from utils import kernel_distance_matrix
 
 def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
     # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
-    edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
+    random.seed(1)
+    cost_rdm = random.sample(range(1, 10), 5)
+    edit_costs = cost_rdm + [0]
+#    edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
+#    edit_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
+#    edit_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
     idx_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
     
     # compute distances in feature space.
@@ -34,24 +45,13 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
     edit_cost_list = []
     
     for itr in range(itr_max):
-        print('iteration', itr)
-        ged_all = []
-        n_edit_operations = [[] for i in range(len(idx_nonzeros))]
+        print('\niteration', itr)
         # compute GEDs and numbers of edit operations.
         edit_cost_constant = [i for i in edit_costs]
         edit_cost_list.append(edit_cost_constant)
-        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-#        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', 
-                    cost='CONSTANT', method='IPFP', 
-                    edit_cost_constant=edit_cost_constant, stabilizer='min', 
-                    repeat=30)
-                ged_all.append(dis)
-                n_eo_tmp = get_nb_edit_operations(Gn[i], 
-                    Gn[j], pi_forward, pi_backward)
-                for idx, item in enumerate(idx_nonzeros):
-                    n_edit_operations[idx].append(n_eo_tmp[item])
+        
+        ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_cost_constant, 
+            idx_nonzeros, parallel=True)
                 
         residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
         residual_list.append(residual)
@@ -59,23 +59,105 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
         # "fit" geds to distances in feature space by tuning edit costs using the
         # Least Squares Method.
         nb_cost_mat = np.array(n_edit_operations).T
-        edit_costs_new, residual = get_better_costs(nb_cost_mat, dis_k_vec)
+        edit_costs_new, residual = compute_better_costs(nb_cost_mat, dis_k_vec)
 
-        print(residual)
+        print('pseudo residual:', residual)
         for i in range(len(edit_costs_new)):
             if edit_costs_new[i] < 0:
-                if edit_costs_new[i] > -1e-6:
+                if edit_costs_new[i] > -1e-9:
                     edit_costs_new[i] = 0
                 else:
                     raise ValueError('The edit cost is negative.')
         
         for idx, item in enumerate(idx_nonzeros):
             edit_costs[item] = edit_costs_new[idx]
+            
+        print('edit_costs:', edit_costs)
+        print('residual_list:', residual_list)
     
-    return edit_costs, residual_list, edit_cost_list
+    return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat
+
+
+def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False):
+    ged_mat = np.zeros((len(Gn), len(Gn)))
+    if parallel:
+#        print('parallel')
+        len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
+        ged_all = [0 for i in range(len_itr)]
+        n_edit_operations = [[0 for i in range(len_itr)] for j in 
+                              range(len(idx_nonzeros))]
+               
+        itr = combinations_with_replacement(range(0, len(Gn)), 2)
+        n_jobs = multiprocessing.cpu_count()
+        if len_itr < 100 * n_jobs:
+            chunksize = int(len_itr / n_jobs) + 1
+        else:
+            chunksize = 100
+        def init_worker(gn_toshare):
+            global G_gn
+            G_gn = gn_toshare
+        do_partial = partial(_wrapper_compute_ged_parallel, edit_cost_constant, 
+                             idx_nonzeros)
+        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
+        iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
+                        desc='computing GEDs', file=sys.stdout)
+#        iterator = pool.imap_unordered(do_partial, itr, chunksize)
+        for i, j, dis, n_eo_tmp in iterator:
+            idx_itr = int(len(Gn) * i + j - i * (i + 1) / 2)
+            ged_all[idx_itr] = dis
+            ged_mat[i][j] = dis
+            ged_mat[j][i] = dis
+            for idx, item in enumerate(idx_nonzeros):
+                n_edit_operations[idx][idx_itr] = n_eo_tmp[item]
+#            print('\n-------------------------------------------')
+#            print(i, j, idx_itr, dis)
+        pool.close()
+        pool.join()
+        
+    else:
+        ged_all = []
+        n_edit_operations = [[] for i in range(len(idx_nonzeros))]
+        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
+#        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+#                time0 = time.time()
+                dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', 
+                    cost='CONSTANT', method='IPFP', 
+                    edit_cost_constant=edit_cost_constant, stabilizer='min', 
+                    repeat=50)
+#                time1 = time.time() - time0
+#                time0 = time.time()
+                ged_all.append(dis)
+                ged_mat[i][j] = dis
+                ged_mat[j][i] = dis
+                n_eo_tmp = get_nb_edit_operations(Gn[i], Gn[j], pi_forward, pi_backward)
+                for idx, item in enumerate(idx_nonzeros):
+                    n_edit_operations[idx].append(n_eo_tmp[item])
+#                time2 = time.time() - time0
+#                print(time1, time2, time1 / time2)
+                    
+    return ged_all, ged_mat, n_edit_operations
+                    
+
+def _wrapper_compute_ged_parallel(edit_cost_constant, idx_nonzeros, itr):
+    i = itr[0]
+    j = itr[1]
+    dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], edit_cost_constant, 
+                                          idx_nonzeros)
+    return i, j, dis, n_eo_tmp
+
+
+def _compute_ged_parallel(g1, g2, edit_cost_constant, idx_nonzeros):
+    dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy', 
+        cost='CONSTANT', method='IPFP', 
+        edit_cost_constant=edit_cost_constant, stabilizer='min', 
+        repeat=50)
+    n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward)
+        
+    return dis, n_eo_tmp
 
 
-def get_better_costs(nb_cost_mat, dis_k_vec):
+def compute_better_costs(nb_cost_mat, dis_k_vec):
 #    # method 1: simple least square method.
 #    edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
 #                                                     rcond=None)
@@ -92,13 +174,11 @@ def get_better_costs(nb_cost_mat, dis_k_vec):
     b = 1
     x = cp.Variable(nb_cost_mat.shape[1])
     prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
-                      [G@x <= h,
-                       A@x == b])
+                      [G@x <= h])
     prob.solve()
     edit_costs_new = x.value
     residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
     
-#    p = program(minimize(norm2(nb_cost_mat*x-dis_k_vec)),[equals(sum(x),1),geq(x,0)])
     return edit_costs_new, residual
 
 
@@ -111,5 +191,8 @@ if __name__ == '__main__':
     remove_edges(Gn)
     gkernel = 'marginalizedkernel'
     itr_max = 10
-    edit_costs, residual_list, edit_cost_list = \
-        fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
\ No newline at end of file
+    time0 = time.time()
+    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat = \
+        fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
+    total_time = time.time() - time0
+    print('total time:', total_time)
\ No newline at end of file
diff --git a/pygraph/utils/parallel.py b/pygraph/utils/parallel.py
index fdf2244..603164a 100644
--- a/pygraph/utils/parallel.py
+++ b/pygraph/utils/parallel.py
@@ -20,11 +20,10 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
 #            def init_worker(v_share):
 #                global G_var
 #                G_var = v_share
-            
+            if n_jobs == None:
+                n_jobs = multiprocessing.cpu_count()
             with Pool(processes=n_jobs, initializer=init_worker, 
-                      initargs=glbv) as pool:
-                if n_jobs == None:
-                    n_jobs = multiprocessing.cpu_count()
+                      initargs=glbv) as pool:                
                 if chunksize == None:
                     if len_itr < 100 * n_jobs:
                         chunksize = int(len_itr / n_jobs) + 1
@@ -35,9 +34,9 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
                     pool.imap_unordered(func, itr, chunksize)):
                     func_assign(result, var_to_assign)
         else:
+            if n_jobs == None:
+                n_jobs = multiprocessing.cpu_count()
             with Pool(processes=n_jobs) as pool:
-                if n_jobs == None:
-                    n_jobs = multiprocessing.cpu_count()
                 if chunksize == None:
                     if len_itr < 100 * n_jobs:
                         chunksize = int(len_itr / n_jobs) + 1