initial

7 years ago · 51911e8bc9
--- a/py-graph/init.py
+++ b/py-graph/init.py
@@ -1,21 +0,0 @@
 # -*-coding:utf-8 -*-
 """
 Pygraph

 This  package contains 4 sub  packages :
        * c_ext : binders to C++ code
        * ged : allows to compute graph edit distance between networkX graphs
        * kernels : computation of graph kernels, ie graph similarity measure compatible with SVM
        * notebooks : examples of code using this library
        * utils : Diverse computation on graphs
 """

 # info
 __version__ = "0.1"
 __author__  = "Benoit Gaüzère"
 __date__    = "November 2017"
 
 # import sub modules
 from pygraph import c_ext
 from pygraph import ged
 from pygraph import utils
--- a/py-graph/kernels/marginalizedKernel.py
+++ b/py-graph/kernels/marginalizedKernel.py
@@ -1,116 +0,0 @@
 import sys
 import pathlib
 sys.path.insert(0, "../")

 import networkx as nx
 import numpy as np
 import time

 def marginalizedkernel(*args):
    """Calculate marginalized graph kernels between graphs.
    
    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    /
    G1, G2 : NetworkX graphs
        2 graphs between which the kernel is calculated.
    p_quit : integer
        the termination probability in the random walks generating step
    itr : integer
        time of iterations to calculate R_inf
        
    Return
    ------
    Kmatrix/Kernel : Numpy matrix/int
        Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs.
        
    References
    ----------
    [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
    """
    if len(args) == 3: # for a list of graphs
        Gn = args[0]

        Kmatrix = np.zeros((len(Gn), len(Gn)))

        start_time = time.time()
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2])
                Kmatrix[j][i] = Kmatrix[i][j]
                
        print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
        
        return Kmatrix
        
    else: # for only 2 graphs
        
        # init parameters
        G1 = args[0]
        G2 = args[1]
        p_quit = args[2] # the termination probability in the random walks generating step
        itr = args[3] # time of iterations to calculate R_inf
        
        kernel = 0
        num_nodes_G1 = nx.number_of_nodes(G1)
        num_nodes_G2 = nx.number_of_nodes(G2)
        p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
        p_init_G2 = 1 / num_nodes_G2
        
        q = p_quit * p_quit
        r1 = q
        
        # initial R_inf
        R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes
        
        # calculate R_inf with a simple interative method
        for i in range(1, itr):
            R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
            R_inf_new.fill(r1)

            # calculate R_inf for each pair of nodes
            for node1 in G1.nodes(data = True):
                neighbor_n1 = G1[node1[0]]
                p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
                for node2 in G2.nodes(data = True):
                    neighbor_n2 = G2[node2[0]]
                    p_trans_n2 = (1 - p_quit) / len(neighbor_n2)    

                    for neighbor1 in neighbor_n1:
                        for neighbor2 in neighbor_n2:
                            
                            t = p_trans_n1 * p_trans_n2 * \
                                deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \
                                deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label'])
                            R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)

            R_inf[:] = R_inf_new
        
        # add elements of R_inf up and calculate kernel
        for node1 in G1.nodes(data = True):
            for node2 in G2.nodes(data = True):                
                s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label'])
                kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
        
        return kernel
    
 def deltaKernel(condition):
    """Return 1 if condition holds, 0 otherwise.
    
    Parameters
    ----------
    condition : Boolean
        A condition, according to which the kernel is set to 1 or 0.
        
    Return
    ------
    Kernel : integer
        Delta Kernel.
        
    References
    ----------
    [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
    """
    return (1 if condition else 0)
--- a/py-graph/kernels/pathKernel.py
+++ b/py-graph/kernels/pathKernel.py
@@ -1,68 +0,0 @@
 import sys
 import pathlib
 sys.path.insert(0, "../")


 import networkx as nx
 import numpy as np
 import time

 from utils.utils import getSPGraph


 def spkernel(*args):
    """Calculate shortest-path kernels between graphs.
    
    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    /
    G1, G2 : NetworkX graphs
        2 graphs between which the kernel is calculated.
        
    Return
    ------
    Kmatrix/Kernel : Numpy matrix/int
        Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
        
    References
    ----------
    [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
    """
    if len(args) == 1: # for a list of graphs
        Gn = args[0]
        
        Kmatrix = np.zeros((len(Gn), len(Gn)))
    
        Sn = [] # get shortest path graphs of Gn
        for i in range(0, len(Gn)):
            Sn.append(getSPGraph(Gn[i]))

        start_time = time.time()
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                for e1 in Sn[i].edges(data = True):
                    for e2 in Sn[j].edges(data = True):          
                        if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
                            Kmatrix[i][j] += 1
                            Kmatrix[j][i] += (0 if i == j else 1)

        print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
        
        return Kmatrix
        
    else: # for only 2 graphs
        G1 = args[0]
        G2 = args[1]
        
        kernel = 0
        
        for e1 in G1.edges(data = True):
            for e2 in G2.edges(data = True):          
                if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
                    kernel += 1

        print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
        
        return kernel
--- a/py-graph/kernels/spkernel.py
+++ b/py-graph/kernels/spkernel.py
@@ -1,68 +0,0 @@
 import sys
 import pathlib
 sys.path.insert(0, "../")


 import networkx as nx
 import numpy as np
 import time

 from utils.utils import getSPGraph


 def spkernel(*args):
    """Calculate shortest-path kernels between graphs.
    
    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    /
    G1, G2 : NetworkX graphs
        2 graphs between which the kernel is calculated.
        
    Return
    ------
    Kmatrix/Kernel : Numpy matrix/int
        Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
        
    References
    ----------
    [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
    """
    if len(args) == 1: # for a list of graphs
        Gn = args[0]
        
        Kmatrix = np.zeros((len(Gn), len(Gn)))
    
        Sn = [] # get shortest path graphs of Gn
        for i in range(0, len(Gn)):
            Sn.append(getSPGraph(Gn[i]))

        start_time = time.time()
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                for e1 in Sn[i].edges(data = True):
                    for e2 in Sn[j].edges(data = True):          
                        if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
                            Kmatrix[i][j] += 1
                            Kmatrix[j][i] += (0 if i == j else 1)

        print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
        
        return Kmatrix
        
    else: # for only 2 graphs
        G1 = args[0]
        G2 = args[1]
        
        kernel = 0
        
        for e1 in G1.edges(data = True):
            for e2 in G2.edges(data = True):          
                if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
                    kernel += 1

        print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
        
        return kernel
--- a/py-graph/utils/init.py
+++ b/py-graph/utils/init.py
@@ -1,17 +0,0 @@
 # -*-coding:utf-8 -*-
 """Pygraph - utils module

 Implement some methods to manage graphs
 graphfiles.py : load .gxl and .ct files
 utils.py : compute some properties on networkX graphs


 """

 # info
 __version__ = "0.1"
 __author__ = "Benoit Gaüzère"
 __date__ = "November 2017"

 from utils import graphfiles
 from utils import utils
--- a/py-graph/utils/graphfiles.py
+++ b/py-graph/utils/graphfiles.py
@@ -1,87 +0,0 @@
 import networkx as nx
   
 def loadCT(filename):
    """load data from .ct file.
    
    Notes
    ------ 
    a typical example of data in .ct is like this:
    
     3 2  <- number of nodes and edges
        0.0000    0.0000    0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?
        0.0000    0.0000    0.0000 C
        0.0000    0.0000    0.0000 O
      1  3  1  1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?
      2  3  1  1
    """
    content = open(filename).read().splitlines()
    G = nx.Graph(name=str(content[0])) # set name of the graph
    tmp = content[1].split(" ")
    if tmp[0] == '':
        nb_nodes = int(tmp[1]) # number of the nodes
        nb_edges = int(tmp[2]) # number of the edges
    else:
        nb_nodes = int(tmp[0])
        nb_edges = int(tmp[1])

    for i in range(0, nb_nodes):
        tmp = content[i + 2].split(" ")
        tmp = [x for x in tmp if x != '']
        G.add_node(i, label=tmp[3])

    for i in range(0, nb_edges):
        tmp = content[i + G.number_of_nodes() + 2].split(" ")
        tmp = [x for x in tmp if x != '']
        G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
    return G


 def loadGXL(filename):
    import networkx as nx
    import xml.etree.ElementTree as ET

    tree = ET.parse(filename)
    root = tree.getroot()
    index = 0
    G = nx.Graph()
    dic={}
    for node in root.iter('node'):
        label = node.find('attr')[0].text
        dic[node.attrib['id']] = index
        G.add_node(index, id=node.attrib['id'], label=label)
        index += 1
        
    for edge in root.iter('edge'):
        label = edge.find('attr')[0].text
        G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
    return G
 
 def loadDataset(filename):
    """load file list of the dataset.
    """
    from os.path import dirname, splitext

    dirname_dataset = dirname(filename)
    extension = splitext(filename)[1][1:]
    data = []
    y = []
    if(extension == "ds"):
        content = open(filename).read().splitlines()
        for i in range(0, len(content)):
            tmp = content[i].split(' ')
            data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
            y.append(float(tmp[1]))
    elif(extension == "cxl"):
        import xml.etree.ElementTree as ET

        tree = ET.parse(filename)
        root = tree.getroot()
        data = []
        y = []
        for graph in root.iter('print'):
            mol_filename = graph.attrib['file']
            mol_class = graph.attrib['class']
            data.append(loadGXL(dirname_dataset + '/' + mol_filename))
            y.append(mol_class)

    return data, y
--- a/py-graph/utils/utils.py
+++ b/py-graph/utils/utils.py
@@ -1,59 +0,0 @@
 import networkx as nx
 import numpy as np


 def getSPLengths(G1):
    sp = nx.shortest_path(G1)
    distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
    for i in np.keys():
        for j in np[i].keys():
            distances[i, j] = len(sp[i][j])-1
    return distances

 def getSPGraph(G):
    """Transform graph G to its corresponding shortest-paths graph.
    
    Parameters
    ----------
    G : NetworkX graph
        The graph to be tramsformed.
        
    Return
    ------
    S : NetworkX graph
        The shortest-paths graph corresponding to G.
        
    Notes
    ------
    For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
    
    References
    ----------
    [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
    """
    return floydTransformation(G)
            
 def floydTransformation(G):
    """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
    
    Parameters
    ----------
    G : NetworkX graph
        The graph to be tramsformed.
        
    Return
    ------
    S : NetworkX graph
        The shortest-paths graph corresponding to G.
        
    References
    ----------
    [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
    """
    spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered
    S = nx.Graph()
    S.add_nodes_from(G.nodes(data=True))
    for i in range(0, G.number_of_nodes()):
        for j in range(0, G.number_of_nodes()):
            S.add_edge(i, j, cost = spMatrix[i, j])
    return S
--- a/pygraph/c_ext/Makefile
+++ b/pygraph/c_ext/Makefile
@@ -1,5 +0,0 @@
 # You must specify your env variable LSAPE_DIR
 #LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/

 liblsap.so:lsap.cpp
 	g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR)
--- a/pygraph/c_ext/README.md
+++ b/pygraph/c_ext/README.md
@@ -1,6 +0,0 @@
 Python wrapper for lsape method

 Specify your LSAPE_DIR env variable with the location of the source
 code to compile

 source code : https://bougleux.users.greyc.fr/lsape/
--- a/pygraph/c_ext/init.py
+++ b/pygraph/c_ext/init.py
@@ -1,17 +0,0 @@
 # -*-coding:utf-8 -*-
 """Pygraph - c_ext module

 This package binds some C++ code to python 

 lsape_binders.py : binders to C++ code of LSAPE methods implemented in 
 https://bougleux.users.greyc.fr/lsape/

 """

 # info
 __version__ = "0.1"
 __author__ = "Benoit Gaüzère"
 __date__ = "November 2017"

 # import sub modules
 from pygraph.c_ext import lsape_binders
--- a/pygraph/c_ext/lsap.cpp
+++ b/pygraph/c_ext/lsap.cpp
@@ -1,43 +0,0 @@
 /*
 Python wrapper
 */

 #include "hungarian-lsape.hh"
 #include "hungarian-lsap.hh"

 #include <cstdio>

 extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){
  double * u = new double[nm];
  double * v = new double[nm];

  int * rho_int = new int[nm];
  int * varrho_int = new int[nm];

  hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int);
  //Find a better way to do
  for (int i =0;i<nm;i++){
    rho[i] = (long)(rho_int[i]);
    varrho[i] = (long)(varrho_int[i]);
  }  
  return 0;
 }



 extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){
  double * u = new double[n];
  double * v = new double[m];

  int * rho_int = new int[n];
  int * varrho_int = new int[m];

  hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v);
  for (int i =0;i<n;i++)
    rho[i] = (long)(rho_int[i]);

  for (int i =0;i<m;i++)
    varrho[i] = (long)(varrho_int[i]);
  
  return 0;
 }
--- a/pygraph/c_ext/lsape_binders.py
+++ b/pygraph/c_ext/lsape_binders.py
@@ -1,23 +0,0 @@
 import numpy as np
 import ctypes as c
 from ctypes import cdll
 import os.path

 def lsap_solverHG(C):
    ''' Binding for lsape hungarian solver '''

    nm = C.shape[0]
    dll_name = 'liblsap.so'
    lib = cdll.LoadLibrary(os.path.abspath(
        os.path.join(os.path.dirname(__file__), dll_name)))
    lib.lsap.restype = c.c_int
    rho = np.zeros((nm, 1), int)
    varrho = np.zeros((nm, 1), int)
    C[C == np.inf] = 10000

    lib.lsap(c.c_void_p(C.transpose().ctypes.data),
             c.c_int(nm),
             c.c_void_p(rho.ctypes.data),
             c.c_void_p(varrho.ctypes.data))

    return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho])
--- a/pygraph/ged/GED.py
+++ b/pygraph/ged/GED.py
@@ -1,72 +0,0 @@
 from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction
 from pygraph.ged.costfunctions import NeighboorhoodCostFunction
 from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping
 from scipy.optimize import linear_sum_assignment

 def ged(G1, G2, method='Riesen', rho=None, varrho=None,
        cf=ConstantCostFunction(1, 3, 1, 3),
        solver=linear_sum_assignment):
    """Compute Graph Edit Distance between G1 and G2 according to mapping
    encoded within rho and varrho. Graph's node must be indexed by a
    index which is used is rho and varrho 
    NB: Utilisation de
    dictionnaire pour etre plus versatile ?

    """
    if ((rho is None) or (varrho is None)):
        if(method == 'Riesen'):
            cf_bp = RiesenCostFunction(cf,lsap_solver=solver)
        elif(method == 'Neighboorhood'):
            cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver)
        elif(method == 'Basic'):
            cf_bp = cf
        else:
            raise NameError('Non existent method ')

        rho, varrho = getOptimalMapping(
            computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver)

    n = G1.number_of_nodes()
    m = G2.number_of_nodes()
    ged = 0
    for i in G1.nodes():
        phi_i = rho[i]
        if(phi_i >= m):
            ged += cf.cnd(i, G1)
        else:
            ged += cf.cns(i, phi_i, G1, G2)
    for j in G2.nodes():
        phi_j = varrho[j]
        if(phi_j >= n):
            ged += cf.cni(j, G2)

    for e in G1.edges(data=True):
        i = e[0]
        j = e[1]
        phi_i = rho[i]
        phi_j = rho[j]
        if (phi_i < m) and (phi_j < m):
            mappedEdge = len(list(filter(lambda x: True if
                                         x == phi_j else False, G2[phi_i])))
            if(mappedEdge):
                e2 = [phi_i, phi_j, G2[phi_i][phi_j]]
                min_cost = min(cf.ces(e, e2, G1, G2),
                               cf.ced(e, G1) + cf.cei(e2, G2))
                ged += min_cost
            else:
                ged += cf.ced(e, G1)
        else:
            ged += cf.ced(e, G1)
    for e in G2.edges(data=True):
        i = e[0]
        j = e[1]
        phi_i = varrho[i]
        phi_j = varrho[j]
        if (phi_i < n) and (phi_j < n):
            mappedEdge = len(list(filter(lambda x: True if x == phi_j
                                         else False, G1[phi_i])))
            if(not mappedEdge):
                ged += cf.cei(e, G2)
        else:
            ged += cf.ced(e, G2)
    return ged, rho, varrho
--- a/pygraph/ged/init.py
+++ b/pygraph/ged/init.py
@@ -1,17 +0,0 @@
 # -*-coding:utf-8 -*-
 """Pygraph - ged module

 Implement some methods to compute ged between graphs


 """

 # info
 __version__ = "0.1"
 __author__ = "Benoit Gaüzère"
 __date__ = "November 2017"

 from pygraph.ged import costfunctions
 from pygraph.ged import bipartiteGED
 from pygraph.ged import GED

--- a/pygraph/ged/bipartiteGED.py
+++ b/pygraph/ged/bipartiteGED.py
@@ -1,33 +0,0 @@
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 from pygraph.ged.costfunctions import ConstantCostFunction


 def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)):
    """Compute a Cost Matrix according to cost function cf"""
    n = G1.number_of_nodes()
    m = G2.number_of_nodes()
    nm = n + m
    C = np.ones([nm, nm])*np.inf
    C[n:, m:] = 0

    for u in G1.nodes():
        for v in G2.nodes():
            cost = cf.cns(u, v, G1, G2)
            C[u, v] = cost

    for v in G1.nodes():
        C[v, m + v] = cf.cnd(v, G1)

    for v in G2.nodes():
        C[n + v, v] = cf.cni(v, G2)
    return C


 def getOptimalMapping(C, lsap_solver=linear_sum_assignment):
    """Compute an optimal linear mapping according to cost Matrix C
    inclure les progs C de Seb

    """
    row_ind, col_ind = lsap_solver(C)
    return col_ind, row_ind[np.argsort(col_ind)]
--- a/pygraph/ged/costfunctions.py
+++ b/pygraph/ged/costfunctions.py
@@ -1,138 +0,0 @@
 import numpy as np
 from scipy.optimize import linear_sum_assignment


 class ConstantCostFunction:
    """ Define a symmetric constant cost fonction for edit operations """
    def __init__(self, cns, cni, ces, cei):
        self.cns_ = cns
        self.cni_ = self.cnd_ = cni
        self.ces_ = ces
        self.cei_ = self.ced_ = cei

    def cns(self, node_u, node_v, g1, g2):
        """ return substitution edit operation cost between node_u of G1 and node_v of G2"""
        return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_

    def cnd(self, u, G1):
        return self.cnd_

    def cni(self, v, G2):
        return self.cni_

    def ces(self, e1, e2, G1, G2):
        """tester avec des attributs autres que symboliques en testant
        l'operateur __eq__"""
        return (e1[2]['label'] != e2[2]['label'])*self.ces_

    def ced(self, e1, G1):
        return self.ced_

    def cei(self, e2, G2):
        return self.cei_


 class RiesenCostFunction():
    """ Cost function associated to the computation of a cost matrix between nodes for LSAP"""
    def __init__(self, cf, lsap_solver=linear_sum_assignment):
        self.cf_ = cf
        self.lsap_solver_ = lsap_solver

    def cns(self, u, v, G1, G2):
        """ u et v sont des id de noeuds """
        n = len(G1[u])
        m = len(G2[v])
        sub_C = np.ones([n+m, n+m]) * np.inf
        sub_C[n:, m:] = 0
        i = 0
        l_nbr_u = G1[u]
        l_nbr_v = G2[v]
        for nbr_u in l_nbr_u:
            j = 0
            e1 = [u, nbr_u, G1[u][nbr_u]]
            for nbr_v in G2[v]:
                e2 = [v, nbr_v, G2[v][nbr_v]]
                sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
                j += 1
            i += 1

        i = 0
        for nbr_u in l_nbr_u:
            sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
            i += 1

        j = 0
        for nbr_v in l_nbr_v:
            sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
            j += 1
        row_ind, col_ind = self.lsap_solver_(sub_C)
        cost = np.sum(sub_C[row_ind, col_ind])
        return self.cf_.cns(u, v, G1, G2) + cost

    def cnd(self, u, G1):
        cost = 0
        for nbr in G1[u]:
            cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1)
            
        return self.cf_.cnd(u,G1) + cost

    def cni(self, v, G2):
        cost = 0
        for nbr in G2[v]:
            cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2)
            
        return self.cf_.cni(v, G2) + cost


 class NeighboorhoodCostFunction():
    """ Cost function associated to the computation of a cost matrix between nodes for LSAP"""
    def __init__(self, cf, lsap_solver=linear_sum_assignment):
        self.cf_ =  cf
        self.lsap_solver_ = lsap_solver

    def cns(self, u, v, G1, G2):
        """ u et v sont des id de noeuds """
        n = len(G1[u])
        m = len(G2[v])
        sub_C = np.ones([n+m, n+m]) * np.inf
        sub_C[n:, m:] = 0
        i = 0
        l_nbr_u = G1[u]
        l_nbr_v = G2[v]
        for nbr_u in l_nbr_u:
            j = 0
            e1 = [u, nbr_u, G1[u][nbr_u]]
            for nbr_v in G2[v]:
                e2 = [v, nbr_v, G2[v][nbr_v]]
                sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
                sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2)
                j += 1
            i += 1

        i = 0
        for nbr_u in l_nbr_u:
            sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
            sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1)
            i += 1

        j = 0
        for nbr_v in l_nbr_v:
            sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
            sub_C[n+j, j] += self.cf_.cni(nbr_v, G2)
            j += 1

        row_ind, col_ind = self.lsap_solver_(sub_C)
        cost = np.sum(sub_C[row_ind, col_ind])
        return self.cf_.cns(u, v, G1, G2) + cost

    def cnd(self, u, G1):
        cost = 0
        for nbr in G1[u]:
            cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1)
        return self.cf_.cnd(u, G1) + cost

    def cni(self, v, G2):
        cost = 0
        for nbr in G2[v]:
            cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2)
        return self.cf_.cni(v, G2) + cost
--- a/pygraph/kernels/.gitignore
+++ b/pygraph/kernels/.gitignore
--- a/pygraph/utils/init.py
+++ b/pygraph/utils/init.py
@@ -13,5 +13,5 @@ __version__ = "0.1"
 __author__ = "Benoit Gaüzère"
 __date__ = "November 2017"

 from pygraph.utils import graphfiles
 from pygraph.utils import utils
 from utils import graphfiles
 from utils import utils
--- a/pygraph/utils/utils.py
+++ b/pygraph/utils/utils.py
@@ -5,8 +5,8 @@ import numpy as np
 def getSPLengths(G1):
    sp = nx.shortest_path(G1)
    distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
    for i in sp.keys():
        for j in sp[i].keys():
    for i in np.keys():
        for j in np[i].keys():
            distances[i, j] = len(sp[i][j])-1
    return distances

--- a/tests/README.md
+++ b/tests/README.md
@@ -1,5 +0,0 @@
 To use the library : 
 $> virtualenv --python=/usr/bin/python3.5 venv
 $> pip install -r requirements.txt
 $> source venv/bin/activate
 ... Go use pygraph
--- a/tests/opt.py
+++ b/tests/opt.py
@@ -1,66 +0,0 @@
 import ot
 import sys
 import pathlib
 sys.path.insert(0, "../")

 from pygraph.utils.graphfiles import loadDataset
 from pygraph.ged.costfunctions import ConstantCostFunction
 from pygraph.utils.utils import getSPLengths
 from tqdm import tqdm
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 from pygraph.ged.GED import ged
 import scipy

 def pad(C, n):
    C_pad = np.zeros((n, n))
    C_pad[:C.shape[0], :C.shape[1]] = C
    return C_pad

 if (__name__ == "__main__"):
    ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds"
    dataset, y = loadDataset(ds_filename)
    cf = ConstantCostFunction(1, 3, 1, 3)
    N = len(dataset)

    pairs = list()
    
    ged_distances = list() #np.zeros((N, N))
    gw_distances = list() #np.zeros((N, N))
    for i in tqdm(range(0, N)):
        for j in tqdm(range(i, N)):
            G1 = dataset[i]
            G2 = dataset[j]
            n = G1.number_of_nodes()
            m = G2.number_of_nodes()
            if(n == m):
                C1 = getSPLengths(G1)
                C2 = getSPLengths(G2)

                C1 /= C1.max()
                C2 /= C2.max()

                dim = max(n, m)
                if(n < m):
                    C1 = pad(C1, dim)
                elif (m < n):
                    C2 = pad(C2, dim)

                p = ot.unif(dim)
                q = ot.unif(dim)

                gw = ot.gromov_wasserstein(C1, C2, p, q,
                                           'square_loss', epsilon=5e-3)
                row_ind, col_ind = linear_sum_assignment(-gw)
                rho = col_ind
                varrho = row_ind[np.argsort(col_ind)]
                pairs.append((i,j))
                gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0])

                ged_distances.append(ged(G1, G2, cf=cf)[0])

    print("Moyenne sur Riesen : {}".format(np.mean(ged_distances)))
    print("Moyenne sur GW : {} ".format(np.mean(gw_distances)))

    np.save("distances_riesen", ged_distances)
    np.save("distances_gw", gw_distances)
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,16 +0,0 @@
 cycler==0.10.0
 Cython==0.27.3
 decorator==4.1.2
 matplotlib==2.1.0
 networkx==2.0
 numpy==1.13.3
 pkg-resources==0.0.0
 POT==0.4.0
 pyparsing==2.2.0
 python-dateutil==2.6.1
 pytz==2017.3
 scikit-learn==0.19.1
 scipy==1.0.0
 six==1.11.0
 sklearn==0.0
 tqdm==4.19.4