1. apply multiprocessing.Pool.imap_unordered method instead of the map method, so that tqdm could be applied to track progress.

2. apply part of the Fast Computation of Shortest Path Kernel (FCSP) to speed up the sp kernel.
7 years ago · 06b32cdf8a
--- a/notebooks/run_spkernel.ipynb
+++ b/notebooks/run_spkernel.ipynb
--- a/pygraph/kernels/spKernel.py
+++ b/pygraph/kernels/spKernel.py
@@ -8,7 +8,7 @@ import pathlib
 sys.path.insert(0, "../")
 from tqdm import tqdm
 import time
 from itertools import combinations_with_replacement, product
 from itertools import combinations, combinations_with_replacement, product
 from functools import partial
 from joblib import Parallel, delayed
 from multiprocessing import Pool
@@ -77,207 +77,108 @@ def spkernel(*args,
    if len(Gn) != len_gn:
        print('\n %d graphs are removed as they don\'t contain edges.\n' %
              (len_gn - len(Gn)))

    start_time = time.time()
    pool = Pool(n_jobs)

    pool = Pool(n_jobs)
    # get shortest path graphs of Gn
    getsp_partial = partial(wrap_getSPGraph, Gn, edge_weight)
    result_sp = pool.map(getsp_partial, range(0, len(Gn)))
    for i in result_sp:
        Gn[i[0]] = i[1]
    if len(Gn) < 100:
        # use default chunksize as pool.map when iterable is less than 100
        chunksize, extra = divmod(len(Gn), n_jobs * 4)
        if extra:
            chunksize += 1
    else:
        chunksize = 100
    # chunksize = 300  # int(len(list(itr)) / n_jobs)
    for i, g in tqdm(
            pool.imap_unordered(getsp_partial, range(0, len(Gn)), chunksize),
            desc='getting sp graphs',
            file=sys.stdout):
        Gn[i] = g

    # Gn = [
    #     getSPGraph(G, edge_weight=edge_weight)
    #     for G in tqdm(Gn, desc='getting sp graphs', file=sys.stdout)
    # ]
    # # ---- use pool.map to parallel ----
    # result_sp = pool.map(getsp_partial, range(0, len(Gn)))
    # for i in result_sp:
    #     Gn[i[0]] = i[1]
    # or
    # getsp_partial = partial(wrap_getSPGraph, Gn, edge_weight)
    # for i, g in tqdm(
    #         pool.map(getsp_partial, range(0, len(Gn))),
    #         desc='getting sp graphs',
    #         file=sys.stdout):
    #     Gn[i] = g

    # # ---- only for the Fast Computation of Shortest Path Kernel (FCSP)
    # sp_ml = [0] * len(Gn)  # shortest path matrices
    # for i in result_sp:
    #     sp_ml[i[0]] = i[1]
    # edge_x_g = [[] for i in range(len(sp_ml))]
    # edge_y_g = [[] for i in range(len(sp_ml))]
    # edge_w_g = [[] for i in range(len(sp_ml))]
    # for idx, item in enumerate(sp_ml):
    #     for i1 in range(len(item)):
    #         for i2 in range(i1 + 1, len(item)):
    #             if item[i1, i2] != np.inf:
    #                 edge_x_g[idx].append(i1)
    #                 edge_y_g[idx].append(i2)
    #                 edge_w_g[idx].append(item[i1, i2])
    # print(len(edge_x_g[0]))
    # print(len(edge_y_g[0]))
    # print(len(edge_w_g[0]))

    Kmatrix = np.zeros((len(Gn), len(Gn)))

    # ---- use pool.imap_unordered to parallel and track progress. ----
    do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels)
    itr = combinations_with_replacement(range(0, len(Gn)), 2)
    # chunksize = 2000  # int(len(list(itr)) / n_jobs)
    # for i, j, kernel in tqdm(pool.imap_unordered(do_partial, itr, chunksize)):
    #     Kmatrix[i][j] = kernel
    #     Kmatrix[j][i] = kernel

    result_perf = pool.map(do_partial, itr)
    len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
    if len_itr < 100:
        chunksize, extra = divmod(len_itr, n_jobs * 4)
        if extra:
            chunksize += 1
    else:
        chunksize = 100
    for i, j, kernel in tqdm(
            pool.imap_unordered(do_partial, itr, chunksize),
            desc='calculating kernels',
            file=sys.stdout):
        Kmatrix[i][j] = kernel
        Kmatrix[j][i] = kernel
    pool.close()
    pool.join()

    # # ---- use pool.map to parallel. ----
    # # result_perf = pool.map(do_partial, itr)
    # do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels)
    # itr = combinations_with_replacement(range(0, len(Gn)), 2)
    # for i, j, kernel in tqdm(
    #         pool.map(do_partial, itr), desc='calculating kernels',
    #         file=sys.stdout):
    #     Kmatrix[i][j] = kernel
    #     Kmatrix[j][i] = kernel
    # pool.close()
    # pool.join()

    # # ---- use joblib.Parallel to parallel and track progress. ----
    # result_perf = Parallel(
    #     n_jobs=n_jobs, verbose=10)(
    #         delayed(do_partial)(ij)
    #         for ij in combinations_with_replacement(range(0, len(Gn)), 2))

    # result_perf = [
    #     do_partial(ij)
    #     for ij in combinations_with_replacement(range(0, len(Gn)), 2)
    # ]
    # for i in result_perf:
    #     Kmatrix[i[0]][i[1]] = i[2]
    #     Kmatrix[i[1]][i[0]] = i[2]

    for i in result_perf:
        Kmatrix[i[0]][i[1]] = i[2]
        Kmatrix[i[1]][i[0]] = i[2]

    # pbar = tqdm(
    #     total=((len(Gn) + 1) * len(Gn) / 2),
    #     desc='calculating kernels',
    #     file=sys.stdout)
    # if ds_attrs['node_labeled']:
    #     # node symb and non-synb labeled
    #     if ds_attrs['node_attr_dim'] > 0:
    #         if ds_attrs['is_directed']:
    #             for i, j in combinations_with_replacement(
    #                     range(0, len(Gn)), 2):
    #                 for e1, e2 in product(
    #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
    #                     if e1[2]['cost'] == e2[2]['cost']:
    #                         kn = node_kernels['mix']
    #                         try:
    #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
    #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
    #                                     j].nodes[e2[1]]
    #                             kn1 = kn(n11[node_label], n21[node_label], [
    #                                 n11['attributes']
    #                             ], [n21['attributes']]) * kn(
    #                                 n12[node_label], n22[node_label],
    #                                 [n12['attributes']], [n22['attributes']])
    #                             Kmatrix[i][j] += kn1
    #                         except KeyError:  # missing labels or attributes
    #                             pass
    #                 Kmatrix[j][i] = Kmatrix[i][j]
    #                 pbar.update(1)

    #         else:
    #             for i, j in combinations_with_replacement(
    #                     range(0, len(Gn)), 2):
    #                 for e1, e2 in product(
    #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
    #                     if e1[2]['cost'] == e2[2]['cost']:
    #                         kn = node_kernels['mix']
    #                         try:
    #                             # each edge walk is counted twice, starting from both its extreme nodes.
    #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
    #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
    #                                     j].nodes[e2[1]]
    #                             kn1 = kn(n11[node_label], n21[node_label], [
    #                                 n11['attributes']
    #                             ], [n21['attributes']]) * kn(
    #                                 n12[node_label], n22[node_label],
    #                                 [n12['attributes']], [n22['attributes']])
    #                             kn2 = kn(n11[node_label], n22[node_label], [
    #                                 n11['attributes']
    #                             ], [n22['attributes']]) * kn(
    #                                 n12[node_label], n21[node_label],
    #                                 [n12['attributes']], [n21['attributes']])
    #                             Kmatrix[i][j] += kn1 + kn2
    #                         except KeyError:  # missing labels or attributes
    #                             pass
    #                 Kmatrix[j][i] = Kmatrix[i][j]
    #                 pbar.update(1)
    #     # node symb labeled
    #     else:
    #         if ds_attrs['is_directed']:
    #             for i, j in combinations_with_replacement(
    #                     range(0, len(Gn)), 2):
    #                 for e1, e2 in product(
    #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
    #                     if e1[2]['cost'] == e2[2]['cost']:
    #                         kn = node_kernels['symb']
    #                         try:
    #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
    #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
    #                                     j].nodes[e2[1]]
    #                             kn1 = kn(n11[node_label],
    #                                      n21[node_label]) * kn(
    #                                          n12[node_label], n22[node_label])
    #                             Kmatrix[i][j] += kn1
    #                         except KeyError:  # missing labels
    #                             pass
    #                 Kmatrix[j][i] = Kmatrix[i][j]
    #                 pbar.update(1)

    #         else:
    #             for i, j in combinations_with_replacement(
    #                     range(0, len(Gn)), 2):
    #                 for e1, e2 in product(
    #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
    #                     if e1[2]['cost'] == e2[2]['cost']:
    #                         kn = node_kernels['symb']
    #                         try:
    #                             # each edge walk is counted twice, starting from both its extreme nodes.
    #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
    #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
    #                                     j].nodes[e2[1]]
    #                             kn1 = kn(n11[node_label],
    #                                      n21[node_label]) * kn(
    #                                          n12[node_label], n22[node_label])
    #                             kn2 = kn(n11[node_label],
    #                                      n22[node_label]) * kn(
    #                                          n12[node_label], n21[node_label])
    #                             Kmatrix[i][j] += kn1 + kn2
    #                         except KeyError:  # missing labels
    #                             pass
    #                 Kmatrix[j][i] = Kmatrix[i][j]
    #                 pbar.update(1)
    # else:
    #     # node non-synb labeled
    #     if ds_attrs['node_attr_dim'] > 0:
    #         if ds_attrs['is_directed']:
    #             for i, j in combinations_with_replacement(
    #                     range(0, len(Gn)), 2):
    #                 for e1, e2 in product(
    #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
    #                     if e1[2]['cost'] == e2[2]['cost']:
    #                         kn = node_kernels['nsymb']
    #                         try:
    #                             # each edge walk is counted twice, starting from both its extreme nodes.
    #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
    #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
    #                                     j].nodes[e2[1]]
    #                             kn1 = kn([n11['attributes']],
    #                                      [n21['attributes']]) * kn(
    #                                          [n12['attributes']],
    #                                          [n22['attributes']])
    #                             Kmatrix[i][j] += kn1
    #                         except KeyError:  # missing attributes
    #                             pass
    #                 Kmatrix[j][i] = Kmatrix[i][j]
    #                 pbar.update(1)
    #         else:
    #             for i, j in combinations_with_replacement(
    #                     range(0, len(Gn)), 2):
    #                 for e1, e2 in product(
    #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
    #                     if e1[2]['cost'] == e2[2]['cost']:
    #                         kn = node_kernels['nsymb']
    #                         try:
    #                             # each edge walk is counted twice, starting from both its extreme nodes.
    #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
    #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
    #                                     j].nodes[e2[1]]
    #                             kn1 = kn([n11['attributes']],
    #                                      [n21['attributes']]) * kn(
    #                                          [n12['attributes']],
    #                                          [n22['attributes']])
    #                             kn2 = kn([n11['attributes']],
    #                                      [n22['attributes']]) * kn(
    #                                          [n12['attributes']],
    #                                          [n21['attributes']])
    #                             Kmatrix[i][j] += kn1 + kn2
    #                         except KeyError:  # missing attributes
    #                             pass
    #                 Kmatrix[j][i] = Kmatrix[i][j]
    #                 pbar.update(1)

    #     # node unlabeled
    #     else:
    #         for i, j in combinations_with_replacement(range(0, len(Gn)), 2):
    #             for e1, e2 in product(
    #                     Gn[i].edges(data=True), Gn[j].edges(data=True)):
    #                 if e1[2]['cost'] == e2[2]['cost']:
    #                     Kmatrix[i][j] += 1
    #             Kmatrix[j][i] = Kmatrix[i][j]
    #             pbar.update(1)
    # # ---- direct running, normally use single CPU core. ----
    # itr = combinations_with_replacement(range(0, len(Gn)), 2)
    # for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
    #     i, j, kernel = spkernel_do(Gn, ds_attrs, node_label, node_kernels, gs)
    #     Kmatrix[i][j] = kernel
    #     Kmatrix[j][i] = kernel

    run_time = time.time() - start_time
    print(
@@ -291,130 +192,271 @@ def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij):

    i = ij[0]
    j = ij[1]
    g1 = Gn[i]
    g2 = Gn[j]
    Kmatrix = 0
    if ds_attrs['node_labeled']:
        # node symb and non-synb labeled
        if ds_attrs['node_attr_dim'] > 0:
            if ds_attrs['is_directed']:
                for e1, e2 in product(
                        Gn[i].edges(data=True), Gn[j].edges(data=True)):
                    if e1[2]['cost'] == e2[2]['cost']:
                        kn = node_kernels['mix']
                        try:
                            n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                    j].nodes[e2[1]]
                            kn1 = kn(
                                n11[node_label], n21[node_label],
                                [n11['attributes']], [n21['attributes']]) * kn(
                                    n12[node_label], n22[node_label],
                                    [n12['attributes']], [n22['attributes']])
                            Kmatrix += kn1
                        except KeyError:  # missing labels or attributes
                            pass

    try:
        # compute shortest path matrices first, method borrowed from FCSP.
        if ds_attrs['node_labeled']:
            # node symb and non-synb labeled
            if ds_attrs['node_attr_dim'] > 0:
                kn = node_kernels['mix']
                vk_dict = {}  # shortest path matrices dict
                for n1, n2 in product(
                        g1.nodes(data=True), g2.nodes(data=True)):
                    vk_dict[(n1[0], n2[0])] = kn(
                        n1[1][node_label], n2[1][node_label],
                        [n1[1]['attributes']], [n2[1]['attributes']])
            # node symb labeled
            else:
                for e1, e2 in product(
                        Gn[i].edges(data=True), Gn[j].edges(data=True)):
                    if e1[2]['cost'] == e2[2]['cost']:
                        kn = node_kernels['mix']
                        try:
                            # each edge walk is counted twice, starting from both its extreme nodes.
                            n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                    j].nodes[e2[1]]
                            kn1 = kn(
                                n11[node_label], n21[node_label],
                                [n11['attributes']], [n21['attributes']]) * kn(
                                    n12[node_label], n22[node_label],
                                    [n12['attributes']], [n22['attributes']])
                            kn2 = kn(
                                n11[node_label], n22[node_label],
                                [n11['attributes']], [n22['attributes']]) * kn(
                                    n12[node_label], n21[node_label],
                                    [n12['attributes']], [n21['attributes']])
                            Kmatrix += kn1 + kn2
                        except KeyError:  # missing labels or attributes
                            pass
        # node symb labeled
                kn = node_kernels['symb']
                vk_dict = {}  # shortest path matrices dict
                for n1 in g1.nodes(data=True):
                    for n2 in g2.nodes(data=True):
                        vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
                                                     n2[1][node_label])
        else:
            if ds_attrs['is_directed']:
                for e1, e2 in product(
                        Gn[i].edges(data=True), Gn[j].edges(data=True)):
                    if e1[2]['cost'] == e2[2]['cost']:
                        kn = node_kernels['symb']
                        try:
                            n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                    j].nodes[e2[1]]
                            kn1 = kn(n11[node_label], n21[node_label]) * kn(
                                n12[node_label], n22[node_label])
                            Kmatrix += kn1
                        except KeyError:  # missing labels
                            pass
            else:
                for e1, e2 in product(
                        Gn[i].edges(data=True), Gn[j].edges(data=True)):
                    if e1[2]['cost'] == e2[2]['cost']:
                        kn = node_kernels['symb']
                        try:
                            # each edge walk is counted twice, starting from both its extreme nodes.
                            n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                    j].nodes[e2[1]]
                            kn1 = kn(n11[node_label], n21[node_label]) * kn(
                                n12[node_label], n22[node_label])
                            kn2 = kn(n11[node_label], n22[node_label]) * kn(
                                n12[node_label], n21[node_label])
                            Kmatrix += kn1 + kn2
                        except KeyError:  # missing labels
                            pass
    else:
        # node non-synb labeled
        if ds_attrs['node_attr_dim'] > 0:
            if ds_attrs['is_directed']:
                for e1, e2 in product(
                        Gn[i].edges(data=True), Gn[j].edges(data=True)):
                    if e1[2]['cost'] == e2[2]['cost']:
                        kn = node_kernels['nsymb']
                        try:
                            # each edge walk is counted twice, starting from both its extreme nodes.
                            n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                    j].nodes[e2[1]]
                            kn1 = kn(
                                [n11['attributes']], [n21['attributes']]) * kn(
                                    [n12['attributes']], [n22['attributes']])
                            Kmatrix += kn1
                        except KeyError:  # missing attributes
                            pass
            # node non-synb labeled
            if ds_attrs['node_attr_dim'] > 0:
                kn = node_kernels['nsymb']
                vk_dict = {}  # shortest path matrices dict
                for n1 in g1.nodes(data=True):
                    for n2 in g2.nodes(data=True):
                        vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
                                                     [n2[1]['attributes']])
            # node unlabeled
            else:
                for e1, e2 in product(
                        Gn[i].edges(data=True), Gn[j].edges(data=True)):
                    if e1[2]['cost'] == e2[2]['cost']:
                        kn = node_kernels['nsymb']
                        try:
                            # each edge walk is counted twice, starting from both its extreme nodes.
                            n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                    j].nodes[e2[1]]
                            kn1 = kn(
                                [n11['attributes']], [n21['attributes']]) * kn(
                                    [n12['attributes']], [n22['attributes']])
                            kn2 = kn(
                                [n11['attributes']], [n22['attributes']]) * kn(
                                    [n12['attributes']], [n21['attributes']])
                            Kmatrix += kn1 + kn2
                        except KeyError:  # missing attributes
                            pass
        # node unlabeled
                        Kmatrix += 1
                return i, j, Kmatrix

        # compute graph kernels
        if ds_attrs['is_directed']:
            for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
                if e1[2]['cost'] == e2[2]['cost']:
                    # each edge walk is counted twice, starting from both its extreme nodes.
                    nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
                                                                   e2[1])]
                    kn1 = nk11 * nk22
                    Kmatrix += kn1 + kn2
        else:
            for e1, e2 in product(
                    Gn[i].edges(data=True), Gn[j].edges(data=True)):
            for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
                if e1[2]['cost'] == e2[2]['cost']:
                    Kmatrix += 1
                    # each edge walk is counted twice, starting from both its extreme nodes.
                    nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
                        e1[0], e2[1])], vk_dict[(e1[1],
                                                 e2[0])], vk_dict[(e1[1],
                                                                   e2[1])]
                    kn1 = nk11 * nk22
                    kn2 = nk12 * nk21
                    Kmatrix += kn1 + kn2

            # # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
            # # compute vertex kernel matrix
            # try:
            #     vk_mat = np.zeros((nx.number_of_nodes(g1),
            #                        nx.number_of_nodes(g2)))
            #     g1nl = enumerate(g1.nodes(data=True))
            #     g2nl = enumerate(g2.nodes(data=True))
            #     for i1, n1 in g1nl:
            #         for i2, n2 in g2nl:
            #             vk_mat[i1][i2] = kn(
            #                 n1[1][node_label], n2[1][node_label],
            #                 [n1[1]['attributes']], [n2[1]['attributes']])

            #     range1 = range(0, len(edge_w_g[i]))
            #     range2 = range(0, len(edge_w_g[j]))
            #     for i1 in range1:
            #         x1 = edge_x_g[i][i1]
            #         y1 = edge_y_g[i][i1]
            #         w1 = edge_w_g[i][i1]
            #         for i2 in range2:
            #             x2 = edge_x_g[j][i2]
            #             y2 = edge_y_g[j][i2]
            #             w2 = edge_w_g[j][i2]
            #             ke = (w1 == w2)
            #             if ke > 0:
            #                 kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
            #                 kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
            #                 Kmatrix += kn1 + kn2
    except KeyError:  # missing labels or attributes
        pass

    return i, j, Kmatrix


 def wrap_getSPGraph(Gn, weight, i):
    return i, getSPGraph(Gn[i], edge_weight=weight)
    return i, getSPGraph(Gn[i], edge_weight=weight)
    # return i, nx.floyd_warshall_numpy(Gn[i], weight=weight)


 # def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij):

 #     i = ij[0]
 #     j = ij[1]
 #     g1 = Gn[i]
 #     g2 = Gn[j]
 #     Kmatrix = 0
 #     if ds_attrs['node_labeled']:
 #         # node symb and non-synb labeled
 #         if ds_attrs['node_attr_dim'] > 0:
 #             if ds_attrs['is_directed']:
 #                 for e1, e2 in product(
 #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
 #                     if e1[2]['cost'] == e2[2]['cost']:
 #                         kn = node_kernels['mix']
 #                         try:
 #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
 #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
 #                                     j].nodes[e2[1]]
 #                             kn1 = kn(
 #                                 n11[node_label], n21[node_label],
 #                                 [n11['attributes']], [n21['attributes']]) * kn(
 #                                     n12[node_label], n22[node_label],
 #                                     [n12['attributes']], [n22['attributes']])
 #                             Kmatrix += kn1
 #                         except KeyError:  # missing labels or attributes
 #                             pass
 #             else:
 #                 kn = node_kernels['mix']
 #                 try:
 #                     # compute shortest path matrices first, method borrowed from FCSP.
 #                     vk_dict = {}  # shortest path matrices dict
 #                     for n1 in g1.nodes(data=True):
 #                         for n2 in g2.nodes(data=True):
 #                             vk_dict[(n1[0], n2[0])] = kn(
 #                                 n1[1][node_label], n2[1][node_label],
 #                                 [n1[1]['attributes']], [n2[1]['attributes']])

 #                     for e1, e2 in product(
 #                             g1.edges(data=True), g2.edges(data=True)):
 #                         if e1[2]['cost'] == e2[2]['cost']:
 #                             # each edge walk is counted twice, starting from both its extreme nodes.
 #                             nk11, nk12, nk21, nk22 = vk_dict[(
 #                                 e1[0],
 #                                 e2[0])], vk_dict[(e1[0], e2[1])], vk_dict[(
 #                                     e1[1], e2[0])], vk_dict[(e1[1], e2[1])]
 #                             kn1 = nk11 * nk22
 #                             kn2 = nk12 * nk21
 #                             Kmatrix += kn1 + kn2

 #                 # # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
 #                 # # compute vertex kernel matrix
 #                 # try:
 #                 #     vk_mat = np.zeros((nx.number_of_nodes(g1),
 #                 #                        nx.number_of_nodes(g2)))
 #                 #     g1nl = enumerate(g1.nodes(data=True))
 #                 #     g2nl = enumerate(g2.nodes(data=True))
 #                 #     for i1, n1 in g1nl:
 #                 #         for i2, n2 in g2nl:
 #                 #             vk_mat[i1][i2] = kn(
 #                 #                 n1[1][node_label], n2[1][node_label],
 #                 #                 [n1[1]['attributes']], [n2[1]['attributes']])

 #                 #     range1 = range(0, len(edge_w_g[i]))
 #                 #     range2 = range(0, len(edge_w_g[j]))
 #                 #     for i1 in range1:
 #                 #         x1 = edge_x_g[i][i1]
 #                 #         y1 = edge_y_g[i][i1]
 #                 #         w1 = edge_w_g[i][i1]
 #                 #         for i2 in range2:
 #                 #             x2 = edge_x_g[j][i2]
 #                 #             y2 = edge_y_g[j][i2]
 #                 #             w2 = edge_w_g[j][i2]
 #                 #             ke = (w1 == w2)
 #                 #             if ke > 0:
 #                 #                 kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
 #                 #                 kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
 #                 #                 Kmatrix += kn1 + kn2

 #                 except KeyError:  # missing labels or attributes
 #                     pass

 #         # node symb labeled
 #         else:
 #             if ds_attrs['is_directed']:
 #                 for e1, e2 in product(
 #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
 #                     if e1[2]['cost'] == e2[2]['cost']:
 #                         kn = node_kernels['symb']
 #                         try:
 #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
 #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
 #                                     j].nodes[e2[1]]
 #                             kn1 = kn(n11[node_label], n21[node_label]) * kn(
 #                                 n12[node_label], n22[node_label])
 #                             Kmatrix += kn1
 #                         except KeyError:  # missing labels
 #                             pass
 #             else:
 #                 kn = node_kernels['symb']
 #                 try:
 #                     # compute shortest path matrices first, method borrowed from FCSP.
 #                     vk_dict = {}  # shortest path matrices dict
 #                     for n1 in g1.nodes(data=True):
 #                         for n2 in g2.nodes(data=True):
 #                             vk_dict[(n1[0], n2[0])] = kn(
 #                                 n1[1][node_label], n2[1][node_label])

 #                     for e1, e2 in product(
 #                             g1.edges(data=True), g2.edges(data=True)):
 #                         if e1[2]['cost'] == e2[2]['cost']:
 #                             # each edge walk is counted twice, starting from both its extreme nodes.
 #                             nk11, nk12, nk21, nk22 = vk_dict[(
 #                                 e1[0],
 #                                 e2[0])], vk_dict[(e1[0], e2[1])], vk_dict[(
 #                                     e1[1], e2[0])], vk_dict[(e1[1], e2[1])]
 #                             kn1 = nk11 * nk22
 #                             kn2 = nk12 * nk21
 #                             Kmatrix += kn1 + kn2
 #                 except KeyError:  # missing labels
 #                     pass
 #     else:
 #         # node non-synb labeled
 #         if ds_attrs['node_attr_dim'] > 0:
 #             if ds_attrs['is_directed']:
 #                 for e1, e2 in product(
 #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
 #                     if e1[2]['cost'] == e2[2]['cost']:
 #                         kn = node_kernels['nsymb']
 #                         try:
 #                             # each edge walk is counted twice, starting from both its extreme nodes.
 #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
 #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
 #                                     j].nodes[e2[1]]
 #                             kn1 = kn(
 #                                 [n11['attributes']], [n21['attributes']]) * kn(
 #                                     [n12['attributes']], [n22['attributes']])
 #                             Kmatrix += kn1
 #                         except KeyError:  # missing attributes
 #                             pass
 #             else:
 #                 for e1, e2 in product(
 #                         Gn[i].edges(data=True), Gn[j].edges(data=True)):
 #                     if e1[2]['cost'] == e2[2]['cost']:
 #                         kn = node_kernels['nsymb']
 #                         try:
 #                             # each edge walk is counted twice, starting from both its extreme nodes.
 #                             n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
 #                                 i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
 #                                     j].nodes[e2[1]]
 #                             kn1 = kn(
 #                                 [n11['attributes']], [n21['attributes']]) * kn(
 #                                     [n12['attributes']], [n22['attributes']])
 #                             kn2 = kn(
 #                                 [n11['attributes']], [n22['attributes']]) * kn(
 #                                     [n12['attributes']], [n21['attributes']])
 #                             Kmatrix += kn1 + kn2
 #                         except KeyError:  # missing attributes
 #                             pass
 #         # node unlabeled
 #         else:
 #             for e1, e2 in product(
 #                     Gn[i].edges(data=True), Gn[j].edges(data=True)):
 #                 if e1[2]['cost'] == e2[2]['cost']:
 #                     Kmatrix += 1

 #     return i, j, Kmatrix
--- a/pygraph/utils/model_selection_precomputed.py
+++ b/pygraph/utils/model_selection_precomputed.py
@@ -190,24 +190,44 @@ def model_selection_for_precomputed_kernel(datafile,
        )
        pool =  Pool(n_jobs)
        trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)
        result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))
        train_pref = [item[0] for item in result_perf]
        val_pref = [item[1] for item in result_perf]
        test_pref = [item[2] for item in result_perf]
        train_pref = []
        val_pref = []
        test_pref = []
        if NUM_TRIALS < 100:
            chunksize, extra = divmod(NUM_TRIALS, n_jobs * 4)
            if extra:
                chunksize += 1
        else:
            chunksize = 100
        for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):
            train_pref.append(o1)
            val_pref.append(o2)
            test_pref.append(o3)
        pool.close()
        pool.join()

        # # ---- use pool.map to parallel. ----
        # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))
        # train_pref = [item[0] for item in result_perf]
        # val_pref = [item[1] for item in result_perf]
        # test_pref = [item[2] for item in result_perf]

        # # ---- use joblib.Parallel to parallel and track progress. ----
        # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)
        # result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS))
        # train_pref = [item[0] for item in result_perf]
        # val_pref = [item[1] for item in result_perf]
        # test_pref = [item[2] for item in result_perf]


        # pbar.clear()
        # np.save(results_name_pre + 'train_pref.dt', train_pref)
        # np.save(results_name_pre + 'val_pref.dt', val_pref)
        # np.save(results_name_pre + 'test_pref.dt', test_pref)
        # # ---- direct running, normally use single CPU core. ----
        # train_pref = []
        # val_pref = []
        # test_pref = []
        # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):
        #     o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)
        #     train_pref.append(o1)
        #     val_pref.append(o2)
        #     test_pref.append(o3)

        print()
        print('4. Getting final performance...')
@@ -479,4 +499,4 @@ def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, t
            test_pref[index_out][index_in] = np.mean(
                current_test_perf)

    return train_pref, val_pref, test_pref
    return train_pref, val_pref, test_pref