1. add Sylvester Equation Methods for the generalized random walk kernel.

2. correct an error in the common walk kernel. DON NOT use the old one. 3. improve the method to construct fully-labeled direct product graphs, much faster for sparse graphs.
7 years ago · 6aac5b2981
--- a/notebooks/run_commonwalkkernel.ipynb
+++ b/notebooks/run_commonwalkkernel.ipynb
--- a/notebooks/run_randomwalkkernel.ipynb
+++ b/notebooks/run_randomwalkkernel.ipynb
--- a/notebooks/run_spkernel.ipynb
+++ b/notebooks/run_spkernel.ipynb
--- a/pygraph/kernels/.#commonWalkKernel.py
+++ b/pygraph/kernels/.#commonWalkKernel.py
@@ -0,0 +1 @@
 ljia@ljia-Precision-7520.4716:1530265749
--- a/pygraph/kernels/commonWalkKernel.py
+++ b/pygraph/kernels/commonWalkKernel.py
@@ -24,7 +24,7 @@ def commonwalkkernel(*args,
                     edge_label='bond_type',
                     n=None,
                     weight=1,
                     compute_method='exp'):
                     compute_method=None):
    """Calculate common walk graph kernels up to depth d between graphs.
    Parameters
    ----------
@@ -40,10 +40,11 @@ def commonwalkkernel(*args,
    n : integer
        Longest length of walks.
    weight: integer
        Weight coefficient of different lengths of walks.
        Weight coefficient of different lengths of walks, which represents beta in 'exp' method and gamma in 'geo'.
    compute_method : string
        Method used to compute walk kernel. The Following choices are available:
        'direct' : direct product graph method, as shown in reference [1]. The time complexity is O(n^6) for unlabeled graphs with n vertices.
        'exp' : exponential serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices.
        'geo' : geometric serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices.
        'brute' : brute force, simply search for all walks and compare them.

    Return
@@ -66,6 +67,8 @@ def commonwalkkernel(*args,
    if not ds_attrs['edge_labeled']:
        for G in Gn:
            nx.set_edge_attributes(G, '0', 'bond_type')
    if not ds_attrs['is_directed']:
        Gn = [G.to_directed() for G in Gn]

    start_time = time.time()

@@ -77,7 +80,7 @@ def commonwalkkernel(*args,
            file=sys.stdout)
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = _untilnwalkkernel_exp(Gn[i], Gn[j], node_label,
                Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
                                                      edge_label, weight)
                Kmatrix[j][i] = Kmatrix[i][j]
                pbar.update(1)
@@ -90,7 +93,7 @@ def commonwalkkernel(*args,
            file=sys.stdout)
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = _untilnwalkkernel_geo(Gn[i], Gn[j], node_label,
                Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
                                                      edge_label, weight)
                Kmatrix[j][i] = Kmatrix[i][j]
                pbar.update(1)
@@ -106,7 +109,7 @@ def commonwalkkernel(*args,

        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = _untilnwalkkernel_brute(
                Kmatrix[i][j] = _commonwalkkernel_brute(
                    all_walks[i],
                    all_walks[j],
                    node_label=node_label,
@@ -122,7 +125,7 @@ def commonwalkkernel(*args,
    return Kmatrix, run_time


 def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta):
 def _commonwalkkernel_exp(G1, G2, node_label, edge_label, beta):
    """Calculate walk graph kernels up to n between 2 graphs using exponential series.

    Parameters
@@ -168,7 +171,7 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta):
    D = np.zeros((len(ew), len(ew)))
    for i in range(len(ew)):
        D[i][i] = np.exp(beta * ew[i])
    # print('D: ', D)
        # print('D: ', D)
    # print('hshs: ', T.I * D * T)

    # print(np.exp(-2))
@@ -176,16 +179,16 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta):
    # print(np.exp(weight * D))
    # print(ev)
    # print(np.linalg.inv(ev))
    exp_D = ev * D * ev.I
    exp_D = ev * D * ev.T
    # print(exp_D)
    # print(np.exp(weight * A))
    # print('-------')

    return np.sum(exp_D.diagonal())
    return exp_D.sum()


 def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma):
    """Calculate walk graph kernels up to n between 2 graphs using geometric series.
 def _commonwalkkernel_geo(G1, G2, node_label, edge_label, gamma):
    """Calculate common walk graph kernels up to n between 2 graphs using geometric series.

    Parameters
    ----------
@@ -207,46 +210,14 @@ def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma):
    # get tensor product / direct product
    gp = direct_product(G1, G2, node_label, edge_label)
    A = nx.adjacency_matrix(gp).todense()
    # print(A)

    # from matplotlib import pyplot as plt
    # nx.draw_networkx(G1)
    # plt.show()
    # nx.draw_networkx(G2)
    # plt.show()
    # nx.draw_networkx(gp)
    # plt.show()
    # print(G1.nodes(data=True))
    # print(G2.nodes(data=True))
    # print(gp.nodes(data=True))
    # print(gp.edges(data=True))

    ew, ev = np.linalg.eig(A)
    # print('ew: ', ew)
    # print(ev)
    # T = np.matrix(ev)
    # print('T: ', T)
    # T = ev.I
    D = np.zeros((len(ew), len(ew)))
    for i in range(len(ew)):
        D[i][i] = np.exp(beta * ew[i])
    # print('D: ', D)
    # print('hshs: ', T.I * D * T)

    # print(np.exp(-2))
    # print(D)
    # print(np.exp(weight * D))
    # print(ev)
    # print(np.linalg.inv(ev))
    exp_D = ev * D * ev.I
    # print(exp_D)
    # print(np.exp(weight * A))
    # print('-------')

    return np.sum(exp_D.diagonal())
    mat = np.identity(len(A)) - gamma * A
    try:
        return mat.I.sum()
    except np.linalg.LinAlgError:
        return np.nan


 def _untilnwalkkernel_brute(walks1,
 def _commonwalkkernel_brute(walks1,
                            walks2,
                            node_label='atom',
                            edge_label='bond_type',
--- a/pygraph/kernels/randomWalkKernel.py
+++ b/pygraph/kernels/randomWalkKernel.py
@@ -19,7 +19,11 @@ from pygraph.utils.graphdataset import get_dataset_attributes
 def randomwalkkernel(*args,
                     node_label='atom',
                     edge_label='bond_type',
                     edge_weight=None,
                     h=10,
                     p=None,
                     q=None,
                     weight=None,
                     compute_method=''):
    """Calculate random walk graph kernels.
    Parameters
@@ -33,7 +37,7 @@ def randomwalkkernel(*args,
        node attribute used as label. The default node label is atom.
    edge_label : string
        edge attribute used as label. The default edge label is bond_type.
    n : integer
    h : integer
        Longest length of walks.
    method : string
        Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.
@@ -46,7 +50,25 @@ def randomwalkkernel(*args,
    compute_method = compute_method.lower()
    h = int(h)
    Gn = args[0] if len(args) == 1 else [args[0], args[1]]
    Kmatrix = np.zeros((len(Gn), len(Gn)))

    eweight = None
    if edge_weight == None:
        print('\n None edge weight specified. Set all weight to 1.\n')
    else:
        try:
            some_weight = list(
                nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
            if isinstance(some_weight, float) or isinstance(some_weight, int):
                eweight = edge_weight
            else:
                print(
                    '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
                    % edge_weight)
        except:
            print(
                '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
                % edge_weight)

    ds_attrs = get_dataset_attributes(
        Gn,
        attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
@@ -71,76 +93,224 @@ def randomwalkkernel(*args,
    #         labeled=labeled) for i in range(0, len(Gn))
    # ]

    pbar = tqdm(
        total=(1 + len(Gn)) * len(Gn) / 2,
        desc='calculating kernels',
        file=sys.stdout)
    if compute_method == 'sylvester':
        import warnings
        warnings.warn(
            'The Sylvester equation (rather than generalized Sylvester equation) is used; only walks of length 1 is considered.'
            'The Sylvester equation (rather than generalized Sylvester equation) is used; edge label number has to smaller than 3.'
        )
        from control import dlyap
        Kmatrix = _randomwalkkernel_sylvester(Gn, weight, p, q, node_label,
                                              edge_label, eweight)

    elif compute_method == 'conjugate':
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = _randomwalkkernel_sylvester(
                    all_walks[i],
                    all_walks[j],
                    node_label=node_label,
                    edge_label=edge_label)
                Kmatrix[i][j] = _randomwalkkernel_conjugate(
                    Gn[i], Gn[j], node_label, edge_label)
                Kmatrix[j][i] = Kmatrix[i][j]
                pbar.update(1)

    elif compute_method == 'conjugate':
        pass
    elif compute_method == 'fp':
        pass
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = _randomwalkkernel_fp(Gn[i], Gn[j], node_label,
                                                     edge_label)
                Kmatrix[j][i] = Kmatrix[i][j]
                pbar.update(1)

    elif compute_method == 'spectral':
        pass
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = _randomwalkkernel_spectral(
                    Gn[i], Gn[j], node_label, edge_label)
                Kmatrix[j][i] = Kmatrix[i][j]
                pbar.update(1)
    elif compute_method == 'kron':
        pass
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j],
                                                       node_label, edge_label)
                Kmatrix[j][i] = Kmatrix[i][j]
                pbar.update(1)
    else:
        raise Exception(
            'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
        )

    for i in range(0, len(Gn)):
        for j in range(i, len(Gn)):
            Kmatrix[i][j] = _randomwalkkernel_do(
                all_walks[i],
                all_walks[j],
                node_label=node_label,
                edge_label=edge_label,
                labeled=labeled)
            Kmatrix[j][i] = Kmatrix[i][j]
    # for i in range(0, len(Gn)):
    #     for j in range(i, len(Gn)):
    #         Kmatrix[i][j] = _randomwalkkernel_do(
    #             all_walks[i],
    #             all_walks[j],
    #             node_label=node_label,
    #             edge_label=edge_label,
    #             labeled=labeled)
    #         Kmatrix[j][i] = Kmatrix[i][j]

    run_time = time.time() - start_time
    print(
        "\n --- kernel matrix of walk kernel up to %d of size %d built in %s seconds ---"
        % (n, len(Gn), run_time))
        "\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---"
        % (len(Gn), run_time))

    return Kmatrix, run_time


 def _randomwalkkernel_sylvester(walks1,
                                walks2,
                                node_label='atom',
                                edge_label='bond_type'):
 def _randomwalkkernel_sylvester(Gn, lmda, p, q, node_label, edge_label,
                                eweight):
    """Calculate walk graph kernels up to n between 2 graphs using Sylvester method.

    Parameters
    ----------
    walks1, walks2 : list
        List of walks in 2 graphs, where for unlabeled graphs, each walk is represented by a list of nodes; while for labeled graphs, each walk is represented by a string consists of labels of nodes and edges on that walk.
    G1, G2 : NetworkX graph
        Graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label. The default node label is atom.
        node attribute used as label.
    edge_label : string
        edge attribute used as label. The default edge label is bond_type.
        edge attribute used as label.

    Return
    ------
    kernel : float
        Kernel between 2 graphs.
    """
    from control import dlyap
    Kmatrix = np.zeros((len(Gn), len(Gn)))

    if q == None:
        # don't normalize adjacency matrices if q is a uniform vector.
        A_list = [
            nx.adjacency_matrix(G, eweight).todense() for G in tqdm(
                Gn, desc='compute adjacency matrices', file=sys.stdout)
        ]
        if p == None:
            pbar = tqdm(
                total=(1 + len(Gn)) * len(Gn) / 2,
                desc='calculating kernels',
                file=sys.stdout)
            for i in range(0, len(Gn)):
                for j in range(i, len(Gn)):
                    A = lmda * A_list[j]
                    Q = A_list[i]
                    # use uniform distribution if there is no prior knowledge.
                    nb_pd = len(A_list[i]) * len(A_list[j])
                    pd_uni = 1 / nb_pd
                    C = np.full((len(A_list[j]), len(A_list[i])), pd_uni)
                    try:
                        X = dlyap(A, Q, C)
                        X = np.reshape(X, (-1, 1), order='F')
                        # use uniform distribution if there is no prior knowledge.
                        q_direct = np.full((1, nb_pd), pd_uni)
                        Kmatrix[i][j] = np.dot(q_direct, X)
                    except TypeError:
                        # print('sth wrong.')
                        Kmatrix[i][j] = np.nan

                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)
    # A_list = []
    # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
    #     A_tilde = nx.adjacency_matrix(G, weight=None).todense()
    #     # normalized adjacency matrices
    #     #          A_list.append(A_tilde / A_tilde.sum(axis=0))
    #     A_list.append(A_tilde)

    return Kmatrix


 def _randomwalkkernel_conjugate(G1, G2, node_label, edge_label):
    """Calculate walk graph kernels up to n between 2 graphs using conjugate method.

    Parameters
    ----------
    G1, G2 : NetworkX graph
        Graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label.
    edge_label : string
        edge attribute used as label.

    Return
    ------
    kernel : float
        Kernel between 2 graphs.
    """

    dpg = nx.tensor_product(G1, G2)  # direct product graph
    import matplotlib.pyplot as plt
    nx.draw_networkx(G1)
    plt.show()
    nx.draw_networkx(G2)
    plt.show()
    nx.draw_networkx(dpg)
    plt.show()
    X = dlyap(A, Q, C)

    return kernel


 def _randomwalkkernel_fp(G1, G2, node_label, edge_label):
    """Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.

    Parameters
    ----------
    G1, G2 : NetworkX graph
        Graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label.
    edge_label : string
        edge attribute used as label.

    Return
    ------
    kernel : float
        Kernel between 2 graphs.
    """

    dpg = nx.tensor_product(G1, G2)  # direct product graph
    X = dlyap(A, Q, C)

    return kernel


 def _randomwalkkernel_spectral(G1, G2, node_label, edge_label):
    """Calculate walk graph kernels up to n between 2 graphs using spectral decomposition method.

    Parameters
    ----------
    G1, G2 : NetworkX graph
        Graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label.
    edge_label : string
        edge attribute used as label.

    Return
    ------
    kernel : float
        Kernel between 2 graphs.
    """

    dpg = nx.tensor_product(G1, G2)  # direct product graph
    X = dlyap(A, Q, C)

    return kernel


 def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
    """Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.

    Parameters
    ----------
    G1, G2 : NetworkX graph
        Graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label.
    edge_label : string
        edge attribute used as label.

    Return
    ------
    kernel : float
        Treelet Kernel between 2 graphs.
        Kernel between 2 graphs.
    """

    dpg = nx.tensor_product(G1, G2)  # direct product graph
--- a/pygraph/kernels/spKernel.py
+++ b/pygraph/kernels/spKernel.py
@@ -8,6 +8,7 @@ import pathlib
 sys.path.insert(0, "../")
 from tqdm import tqdm
 import time
 from itertools import combinations_with_replacement, product

 import networkx as nx
 import numpy as np
@@ -39,8 +40,6 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None):
    # pre-process
    Gn = args[0] if len(args) == 1 else [args[0], args[1]]

    Gn = [nx.to_directed(G) for G in Gn]

    weight = None
    if edge_weight == None:
        print('\n None edge weight specified. Set all weight to 1.\n')
@@ -89,174 +88,158 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None):
        # node symb and non-synb labeled
        if ds_attrs['node_attr_dim'] > 0:
            if ds_attrs['is_directed']:
                for i in range(0, len(Gn)):
                    for j in range(i, len(Gn)):
                        for e1 in Gn[i].edges(data=True):
                            for e2 in Gn[j].edges(data=True):
                                if e1[2]['cost'] == e2[2]['cost']:
                                    kn = node_kernels['mix']
                                    try:
                                        n11, n12, n21, n22 = Gn[i].nodes[e1[
                                            0]], Gn[i].nodes[e1[1]], Gn[
                                                j].nodes[e2[0]], Gn[j].nodes[
                                                    e2[1]]
                                        kn1 = kn(n11[node_label], n21[
                                            node_label], [n11['attributes']],
                                                 [n21['attributes']]) * kn(
                                                     n12[node_label],
                                                     n22[node_label],
                                                     [n12['attributes']],
                                                     [n22['attributes']])
                                        Kmatrix[i][j] += kn1
                                    except KeyError:  # missing labels or attributes
                                        pass
                        Kmatrix[j][i] = Kmatrix[i][j]
                        pbar.update(1)
                for i, j in combinations_with_replacement(
                        range(0, len(Gn)), 2):
                    for e1, e2 in product(
                            Gn[i].edges(data=True), Gn[j].edges(data=True)):
                        if e1[2]['cost'] == e2[2]['cost']:
                            kn = node_kernels['mix']
                            try:
                                n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                    i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                        j].nodes[e2[1]]
                                kn1 = kn(n11[node_label], n21[node_label], [
                                    n11['attributes']
                                ], [n21['attributes']]) * kn(
                                    n12[node_label], n22[node_label],
                                    [n12['attributes']], [n22['attributes']])
                                Kmatrix[i][j] += kn1
                            except KeyError:  # missing labels or attributes
                                pass
                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)

            else:
                for i in range(0, len(Gn)):
                    for j in range(i, len(Gn)):
                        for e1 in Gn[i].edges(data=True):
                            for e2 in Gn[j].edges(data=True):
                                if e1[2]['cost'] == e2[2]['cost']:
                                    kn = node_kernels['mix']
                                    try:
                                        # each edge walk is counted twice, starting from both its extreme nodes.
                                        n11, n12, n21, n22 = Gn[i].nodes[e1[
                                            0]], Gn[i].nodes[e1[1]], Gn[
                                                j].nodes[e2[0]], Gn[j].nodes[
                                                    e2[1]]
                                        kn1 = kn(n11[node_label], n21[
                                            node_label], [n11['attributes']],
                                                 [n21['attributes']]) * kn(
                                                     n12[node_label],
                                                     n22[node_label],
                                                     [n12['attributes']],
                                                     [n22['attributes']])
                                        kn2 = kn(n11[node_label], n22[
                                            node_label], [n11['attributes']],
                                                 [n22['attributes']]) * kn(
                                                     n12[node_label],
                                                     n21[node_label],
                                                     [n12['attributes']],
                                                     [n21['attributes']])
                                        Kmatrix[i][j] += kn1 + kn2
                                    except KeyError:  # missing labels or attributes
                                        pass
                        Kmatrix[j][i] = Kmatrix[i][j]
                        pbar.update(1)
                for i, j in combinations_with_replacement(
                        range(0, len(Gn)), 2):
                    for e1, e2 in product(
                            Gn[i].edges(data=True), Gn[j].edges(data=True)):
                        if e1[2]['cost'] == e2[2]['cost']:
                            kn = node_kernels['mix']
                            try:
                                # each edge walk is counted twice, starting from both its extreme nodes.
                                n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                    i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                        j].nodes[e2[1]]
                                kn1 = kn(n11[node_label], n21[node_label], [
                                    n11['attributes']
                                ], [n21['attributes']]) * kn(
                                    n12[node_label], n22[node_label],
                                    [n12['attributes']], [n22['attributes']])
                                kn2 = kn(n11[node_label], n22[node_label], [
                                    n11['attributes']
                                ], [n22['attributes']]) * kn(
                                    n12[node_label], n21[node_label],
                                    [n12['attributes']], [n21['attributes']])
                                Kmatrix[i][j] += kn1 + kn2
                            except KeyError:  # missing labels or attributes
                                pass
                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)
        # node symb labeled
        else:
            if ds_attrs['is_directed']:
                for i in range(0, len(Gn)):
                    for j in range(i, len(Gn)):
                        for e1 in Gn[i].edges(data=True):
                            for e2 in Gn[j].edges(data=True):
                                if e1[2]['cost'] == e2[2]['cost']:
                                    kn = node_kernels['symb']
                                    try:
                                        n11, n12, n21, n22 = Gn[i].nodes[e1[
                                            0]], Gn[i].nodes[e1[1]], Gn[
                                                j].nodes[e2[0]], Gn[j].nodes[
                                                    e2[1]]
                                        kn1 = kn(n11[node_label],
                                                 n21[node_label]) * kn(
                                                     n12[node_label],
                                                     n22[node_label])
                                        Kmatrix[i][j] += kn1
                                    except KeyError:  # missing labels
                                        pass
                        Kmatrix[j][i] = Kmatrix[i][j]
                        pbar.update(1)
                for i, j in combinations_with_replacement(
                        range(0, len(Gn)), 2):
                    for e1, e2 in product(
                            Gn[i].edges(data=True), Gn[j].edges(data=True)):
                        if e1[2]['cost'] == e2[2]['cost']:
                            kn = node_kernels['symb']
                            try:
                                n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                    i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                        j].nodes[e2[1]]
                                kn1 = kn(n11[node_label],
                                         n21[node_label]) * kn(
                                             n12[node_label], n22[node_label])
                                Kmatrix[i][j] += kn1
                            except KeyError:  # missing labels
                                pass
                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)

            else:
                for i in range(0, len(Gn)):
                    for j in range(i, len(Gn)):
                        for e1 in Gn[i].edges(data=True):
                            for e2 in Gn[j].edges(data=True):
                                if e1[2]['cost'] == e2[2]['cost']:
                                    kn = node_kernels['symb']
                                    try:
                                        # each edge walk is counted twice, starting from both its extreme nodes.
                                        n11, n12, n21, n22 = Gn[i].nodes[e1[
                                            0]], Gn[i].nodes[e1[1]], Gn[
                                                j].nodes[e2[0]], Gn[j].nodes[
                                                    e2[1]]
                                        kn1 = kn(n11[node_label],
                                                 n21[node_label]) * kn(
                                                     n12[node_label],
                                                     n22[node_label])
                                        kn2 = kn(n11[node_label],
                                                 n22[node_label]) * kn(
                                                     n12[node_label],
                                                     n21[node_label])
                                        Kmatrix[i][j] += kn1 + kn2
                                    except KeyError:  # missing labels
                                        pass
                        Kmatrix[j][i] = Kmatrix[i][j]
                        pbar.update(1)
                for i, j in combinations_with_replacement(
                        range(0, len(Gn)), 2):
                    for e1, e2 in product(
                            Gn[i].edges(data=True), Gn[j].edges(data=True)):
                        if e1[2]['cost'] == e2[2]['cost']:
                            kn = node_kernels['symb']
                            try:
                                # each edge walk is counted twice, starting from both its extreme nodes.
                                n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                    i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                        j].nodes[e2[1]]
                                kn1 = kn(n11[node_label],
                                         n21[node_label]) * kn(
                                             n12[node_label], n22[node_label])
                                kn2 = kn(n11[node_label],
                                         n22[node_label]) * kn(
                                             n12[node_label], n21[node_label])
                                Kmatrix[i][j] += kn1 + kn2
                            except KeyError:  # missing labels
                                pass
                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)
    else:
        # node non-synb labeled
        if ds_attrs['node_attr_dim'] > 0:
            if ds_attrs['is_directed']:
                for i in range(0, len(Gn)):
                    for j in range(i, len(Gn)):
                        for e1 in Gn[i].edges(data=True):
                            for e2 in Gn[j].edges(data=True):
                                if e1[2]['cost'] == e2[2]['cost']:
                                    kn = node_kernels['nsymb']
                                    try:
                                        # each edge walk is counted twice, starting from both its extreme nodes.
                                        n11, n12, n21, n22 = Gn[i].nodes[e1[
                                            0]], Gn[i].nodes[e1[1]], Gn[
                                                j].nodes[e2[0]], Gn[j].nodes[
                                                    e2[1]]
                                        kn1 = kn([n11['attributes']],
                                                 [n21['attributes']]) * kn(
                                                     [n12['attributes']],
                                                     [n22['attributes']])
                                        Kmatrix[i][j] += kn1
                                    except KeyError:  # missing attributes
                                        pass
                        Kmatrix[j][i] = Kmatrix[i][j]
                        pbar.update(1)
                for i, j in combinations_with_replacement(
                        range(0, len(Gn)), 2):
                    for e1, e2 in product(
                            Gn[i].edges(data=True), Gn[j].edges(data=True)):
                        if e1[2]['cost'] == e2[2]['cost']:
                            kn = node_kernels['nsymb']
                            try:
                                # each edge walk is counted twice, starting from both its extreme nodes.
                                n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                    i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                        j].nodes[e2[1]]
                                kn1 = kn([n11['attributes']],
                                         [n21['attributes']]) * kn(
                                             [n12['attributes']],
                                             [n22['attributes']])
                                Kmatrix[i][j] += kn1
                            except KeyError:  # missing attributes
                                pass
                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)
            else:
                for i in range(0, len(Gn)):
                    for j in range(i, len(Gn)):
                        for e1 in Gn[i].edges(data=True):
                            for e2 in Gn[j].edges(data=True):
                                if e1[2]['cost'] == e2[2]['cost']:
                                    kn = node_kernels['nsymb']
                                    try:
                                        # each edge walk is counted twice, starting from both its extreme nodes.
                                        n11, n12, n21, n22 = Gn[i].nodes[e1[
                                            0]], Gn[i].nodes[e1[1]], Gn[
                                                j].nodes[e2[0]], Gn[j].nodes[
                                                    e2[1]]
                                        kn1 = kn([n11['attributes']],
                                                 [n21['attributes']]) * kn(
                                                     [n12['attributes']],
                                                     [n22['attributes']])
                                        kn2 = kn([n11['attributes']],
                                                 [n22['attributes']]) * kn(
                                                     [n12['attributes']],
                                                     [n21['attributes']])
                                        Kmatrix[i][j] += kn1 + kn2
                                    except KeyError:  # missing attributes
                                        pass
                        Kmatrix[j][i] = Kmatrix[i][j]
                        pbar.update(1)
                for i, j in combinations_with_replacement(
                        range(0, len(Gn)), 2):
                    for e1, e2 in product(
                            Gn[i].edges(data=True), Gn[j].edges(data=True)):
                        if e1[2]['cost'] == e2[2]['cost']:
                            kn = node_kernels['nsymb']
                            try:
                                # each edge walk is counted twice, starting from both its extreme nodes.
                                n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
                                    i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
                                        j].nodes[e2[1]]
                                kn1 = kn([n11['attributes']],
                                         [n21['attributes']]) * kn(
                                             [n12['attributes']],
                                             [n22['attributes']])
                                kn2 = kn([n11['attributes']],
                                         [n22['attributes']]) * kn(
                                             [n12['attributes']],
                                             [n21['attributes']])
                                Kmatrix[i][j] += kn1 + kn2
                            except KeyError:  # missing attributes
                                pass
                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)

        # node unlabeled
        else:
            for i in range(0, len(Gn)):
                for j in range(i, len(Gn)):
                    for e1 in Gn[i].edges(data=True):
                        for e2 in Gn[j].edges(data=True):
                            if e1[2]['cost'] == e2[2]['cost']:
                                Kmatrix[i][j] += 1
                    Kmatrix[j][i] = Kmatrix[i][j]
                    pbar.update(1)
            for i, j in combinations_with_replacement(range(0, len(Gn)), 2):
                for e1, e2 in product(
                        Gn[i].edges(data=True), Gn[j].edges(data=True)):
                    if e1[2]['cost'] == e2[2]['cost']:
                        Kmatrix[i][j] += 1
                Kmatrix[j][i] = Kmatrix[i][j]
                pbar.update(1)

    run_time = time.time() - start_time
    print(
--- a/pygraph/utils/utils.py
+++ b/pygraph/utils/utils.py
@@ -119,7 +119,7 @@ def untotterTransformation(G, node_label, edge_label):


 def direct_product(G1, G2, node_label, edge_label):
    """Return the direct/tensor product of G1 and G2.
    """Return the direct/tensor product of directed graphs G1 and G2.

    Parameters
    ----------
@@ -137,7 +137,7 @@ def direct_product(G1, G2, node_label, edge_label):

    Notes
    -----
    This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to direct product graph.
    This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.

    References
    ----------
@@ -147,25 +147,37 @@ def direct_product(G1, G2, node_label, edge_label):
    from itertools import product

    # G = G.to_directed()
    gt = nx.Graph()
    gt = nx.DiGraph()
    # add nodes
    for u, v in product(G1, G2):
        if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
            gt.add_node((u, v))
            gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
    # add edges
    for u, v in product(gt, gt):
        if (u[0], v[0]) in G1.edges and (
                u[1], v[1]
        ) in G2.edges and G1.edges[u[0],
                                   v[0]][edge_label] == G2.edges[u[1],
                                                                 v[1]][edge_label]:
            gt.add_edge((u[0], u[1]), (v[0], v[1]))
            gt.edges[(u[0], u[1]), (v[0], v[1])].update({
    # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
    for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
        if (u1, u2) in gt and (
                v1, v2
        ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
                                                             v2][edge_label]:
            gt.add_edge((u1, u2), (v1, v2))
            gt.edges[(u1, u2), (v1, v2)].update({
                edge_label:
                G1.edges[u[0], v[0]][edge_label]
                G1.edges[u1, v1][edge_label]
            })

    # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
    # for u, v in product(gt, gt):
    #     if (u[0], v[0]) in G1.edges and (
    #             u[1], v[1]
    #     ) in G2.edges and G1.edges[u[0],
    #                                v[0]][edge_label] == G2.edges[u[1],
    #                                                              v[1]][edge_label]:
    #         gt.add_edge((u[0], u[1]), (v[0], v[1]))
    #         gt.edges[(u[0], u[1]), (v[0], v[1])].update({
    #             edge_label:
    #             G1.edges[u[0], v[0]][edge_label]
    #         })

    # relabel nodes using consecutive integers for convenience of kernel calculation.
    # gt = nx.convert_node_labels_to_integers(
    #     gt, first_label=0, label_attribute='label_orignal')