"""
@author: linlin
@references:
    [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
    [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
"""

import sys
import pathlib
sys.path.insert(0, "../")
import time

import networkx as nx
import numpy as np
from matplotlib import pyplot as plt

from tqdm import tqdm
tqdm.monitor_interval = 0

from pygraph.kernels.deltaKernel import deltakernel
from pygraph.utils.utils import untotterTransformation


def marginalizedkernel(*args,
                       node_label='atom',
                       edge_label='bond_type',
                       p_quit=0.5,
                       itr=20,
                       remove_totters=True):
    """Calculate marginalized graph kernels between graphs.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    /
    G1, G2 : NetworkX graphs
        2 graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label. The default node label is atom.
    edge_label : string
        edge attribute used as label. The default edge label is bond_type.
    p_quit : integer
        the termination probability in the random walks generating step
    itr : integer
        time of iterations to calculate R_inf
    remove_totters : boolean
        whether to remove totters. The default value is True.

    Return
    ------
    Kmatrix : Numpy matrix
        Kernel matrix, each element of which is the marginalized kernel between 2 praphs.
    """
    # arrange all graphs in a list
    Gn = args[0] if len(args) == 1 else [args[0], args[1]]
    Kmatrix = np.zeros((len(Gn), len(Gn)))

    start_time = time.time()

    if remove_totters:
        Gn = [untotterTransformation(G, node_label, edge_label) for G in Gn]

    pbar = tqdm(
        total=(1 + len(Gn)) * len(Gn) / 2,
        desc='calculate kernels',
        file=sys.stdout)
    for i in range(0, len(Gn)):
        for j in range(i, len(Gn)):
            Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label,
                                                   edge_label, p_quit, itr)
            Kmatrix[j][i] = Kmatrix[i][j]
            pbar.update(1)

    run_time = time.time() - start_time
    print(
        "\n --- marginalized kernel matrix of size %d built in %s seconds ---"
        % (len(Gn), run_time))

    return Kmatrix, run_time


def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
    """Calculate marginalized graph kernel between 2 graphs.

    Parameters
    ----------
    G1, G2 : NetworkX graphs
        2 graphs between which the kernel is calculated.
    node_label : string
        node attribute used as label.
    edge_label : string
        edge attribute used as label.
    p_quit : integer
        the termination probability in the random walks generating step.
    itr : integer
        time of iterations to calculate R_inf.

    Return
    ------
    kernel : float
        Marginalized Kernel between 2 graphs.
    """
    # init parameters
    kernel = 0
    num_nodes_G1 = nx.number_of_nodes(G1)
    num_nodes_G2 = nx.number_of_nodes(G2)
    p_init_G1 = 1 / num_nodes_G1  # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
    p_init_G2 = 1 / num_nodes_G2

    q = p_quit * p_quit
    r1 = q

    # initial R_inf
    # matrix to save all the R_inf for all pairs of nodes
    R_inf = np.zeros([num_nodes_G1, num_nodes_G2])

    # calculate R_inf with a simple interative method
    for i in range(1, itr):
        R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
        R_inf_new.fill(r1)

        # calculate R_inf for each pair of nodes
        for node1 in G1.nodes(data=True):
            neighbor_n1 = G1[node1[0]]
            # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
            p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
            for node2 in G2.nodes(data=True):
                neighbor_n2 = G2[node2[0]]
                p_trans_n2 = (1 - p_quit) / len(neighbor_n2)

                for neighbor1 in neighbor_n1:
                    for neighbor2 in neighbor_n2:
                        t = p_trans_n1 * p_trans_n2 * \
                            deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \
                            deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])

                        R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
                            neighbor2]  # ref [1] equation (8)
        R_inf[:] = R_inf_new

    # add elements of R_inf up and calculate kernel
    for node1 in G1.nodes(data=True):
        for node2 in G2.nodes(data=True):
            s = p_init_G1 * p_init_G2 * deltakernel(
                node1[1][node_label] == node2[1][node_label])
            kernel += s * R_inf[node1[0]][node2[0]]  # ref [1] equation (6)

    return kernel