Browse Source

initial

v0.1
jajupmochi 7 years ago
parent
commit
51911e8bc9
22 changed files with 4 additions and 881 deletions
  1. +0
    -21
      py-graph/__init__.py
  2. +0
    -116
      py-graph/kernels/marginalizedKernel.py
  3. +0
    -68
      py-graph/kernels/pathKernel.py
  4. +0
    -68
      py-graph/kernels/spkernel.py
  5. +0
    -17
      py-graph/utils/__init__.py
  6. +0
    -87
      py-graph/utils/graphfiles.py
  7. +0
    -59
      py-graph/utils/utils.py
  8. +0
    -5
      pygraph/c_ext/Makefile
  9. +0
    -6
      pygraph/c_ext/README.md
  10. +0
    -17
      pygraph/c_ext/__init__.py
  11. +0
    -43
      pygraph/c_ext/lsap.cpp
  12. +0
    -23
      pygraph/c_ext/lsape_binders.py
  13. +0
    -72
      pygraph/ged/GED.py
  14. +0
    -17
      pygraph/ged/__init__.py
  15. +0
    -33
      pygraph/ged/bipartiteGED.py
  16. +0
    -138
      pygraph/ged/costfunctions.py
  17. +0
    -0
      pygraph/kernels/.gitignore
  18. +2
    -2
      pygraph/utils/__init__.py
  19. +2
    -2
      pygraph/utils/utils.py
  20. +0
    -5
      tests/README.md
  21. +0
    -66
      tests/opt.py
  22. +0
    -16
      tests/requirements.txt

+ 0
- 21
py-graph/__init__.py View File

@@ -1,21 +0,0 @@
# -*-coding:utf-8 -*-
"""
Pygraph

This package contains 4 sub packages :
* c_ext : binders to C++ code
* ged : allows to compute graph edit distance between networkX graphs
* kernels : computation of graph kernels, ie graph similarity measure compatible with SVM
* notebooks : examples of code using this library
* utils : Diverse computation on graphs
"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"
# import sub modules
from pygraph import c_ext
from pygraph import ged
from pygraph import utils

+ 0
- 116
py-graph/kernels/marginalizedKernel.py View File

@@ -1,116 +0,0 @@
import sys
import pathlib
sys.path.insert(0, "../")

import networkx as nx
import numpy as np
import time

def marginalizedkernel(*args):
"""Calculate marginalized graph kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
p_quit : integer
the termination probability in the random walks generating step
itr : integer
time of iterations to calculate R_inf
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs.
References
----------
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
"""
if len(args) == 3: # for a list of graphs
Gn = args[0]

Kmatrix = np.zeros((len(Gn), len(Gn)))

start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2])
Kmatrix[j][i] = Kmatrix[i][j]
print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
# init parameters
G1 = args[0]
G2 = args[1]
p_quit = args[2] # the termination probability in the random walks generating step
itr = args[3] # time of iterations to calculate R_inf
kernel = 0
num_nodes_G1 = nx.number_of_nodes(G1)
num_nodes_G2 = nx.number_of_nodes(G2)
p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
p_init_G2 = 1 / num_nodes_G2
q = p_quit * p_quit
r1 = q
# initial R_inf
R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes
# calculate R_inf with a simple interative method
for i in range(1, itr):
R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
R_inf_new.fill(r1)

# calculate R_inf for each pair of nodes
for node1 in G1.nodes(data = True):
neighbor_n1 = G1[node1[0]]
p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
for node2 in G2.nodes(data = True):
neighbor_n2 = G2[node2[0]]
p_trans_n2 = (1 - p_quit) / len(neighbor_n2)

for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t = p_trans_n1 * p_trans_n2 * \
deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \
deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label'])
R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)

R_inf[:] = R_inf_new
# add elements of R_inf up and calculate kernel
for node1 in G1.nodes(data = True):
for node2 in G2.nodes(data = True):
s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label'])
kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
return kernel
def deltaKernel(condition):
"""Return 1 if condition holds, 0 otherwise.
Parameters
----------
condition : Boolean
A condition, according to which the kernel is set to 1 or 0.
Return
------
Kernel : integer
Delta Kernel.
References
----------
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
"""
return (1 if condition else 0)

+ 0
- 68
py-graph/kernels/pathKernel.py View File

@@ -1,68 +0,0 @@
import sys
import pathlib
sys.path.insert(0, "../")


import networkx as nx
import numpy as np
import time

from utils.utils import getSPGraph


def spkernel(*args):
"""Calculate shortest-path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
Sn = [] # get shortest path graphs of Gn
for i in range(0, len(Gn)):
Sn.append(getSPGraph(Gn[i]))

start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Sn[i].edges(data = True):
for e2 in Sn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] += (0 if i == j else 1)

print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
G1 = args[0]
G2 = args[1]
kernel = 0
for e1 in G1.edges(data = True):
for e2 in G2.edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
kernel += 1

print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
return kernel

+ 0
- 68
py-graph/kernels/spkernel.py View File

@@ -1,68 +0,0 @@
import sys
import pathlib
sys.path.insert(0, "../")


import networkx as nx
import numpy as np
import time

from utils.utils import getSPGraph


def spkernel(*args):
"""Calculate shortest-path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
Sn = [] # get shortest path graphs of Gn
for i in range(0, len(Gn)):
Sn.append(getSPGraph(Gn[i]))

start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Sn[i].edges(data = True):
for e2 in Sn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] += (0 if i == j else 1)

print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
G1 = args[0]
G2 = args[1]
kernel = 0
for e1 in G1.edges(data = True):
for e2 in G2.edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
kernel += 1

print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
return kernel

+ 0
- 17
py-graph/utils/__init__.py View File

@@ -1,17 +0,0 @@
# -*-coding:utf-8 -*-
"""Pygraph - utils module

Implement some methods to manage graphs
graphfiles.py : load .gxl and .ct files
utils.py : compute some properties on networkX graphs


"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

from utils import graphfiles
from utils import utils

+ 0
- 87
py-graph/utils/graphfiles.py View File

@@ -1,87 +0,0 @@
import networkx as nx
def loadCT(filename):
"""load data from .ct file.
Notes
------
a typical example of data in .ct is like this:
3 2 <- number of nodes and edges
0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?
0.0000 0.0000 0.0000 C
0.0000 0.0000 0.0000 O
1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?
2 3 1 1
"""
content = open(filename).read().splitlines()
G = nx.Graph(name=str(content[0])) # set name of the graph
tmp = content[1].split(" ")
if tmp[0] == '':
nb_nodes = int(tmp[1]) # number of the nodes
nb_edges = int(tmp[2]) # number of the edges
else:
nb_nodes = int(tmp[0])
nb_edges = int(tmp[1])

for i in range(0, nb_nodes):
tmp = content[i + 2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_node(i, label=tmp[3])

for i in range(0, nb_edges):
tmp = content[i + G.number_of_nodes() + 2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
return G


def loadGXL(filename):
import networkx as nx
import xml.etree.ElementTree as ET

tree = ET.parse(filename)
root = tree.getroot()
index = 0
G = nx.Graph()
dic={}
for node in root.iter('node'):
label = node.find('attr')[0].text
dic[node.attrib['id']] = index
G.add_node(index, id=node.attrib['id'], label=label)
index += 1
for edge in root.iter('edge'):
label = edge.find('attr')[0].text
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
return G
def loadDataset(filename):
"""load file list of the dataset.
"""
from os.path import dirname, splitext

dirname_dataset = dirname(filename)
extension = splitext(filename)[1][1:]
data = []
y = []
if(extension == "ds"):
content = open(filename).read().splitlines()
for i in range(0, len(content)):
tmp = content[i].split(' ')
data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
y.append(float(tmp[1]))
elif(extension == "cxl"):
import xml.etree.ElementTree as ET

tree = ET.parse(filename)
root = tree.getroot()
data = []
y = []
for graph in root.iter('print'):
mol_filename = graph.attrib['file']
mol_class = graph.attrib['class']
data.append(loadGXL(dirname_dataset + '/' + mol_filename))
y.append(mol_class)

return data, y

+ 0
- 59
py-graph/utils/utils.py View File

@@ -1,59 +0,0 @@
import networkx as nx
import numpy as np


def getSPLengths(G1):
sp = nx.shortest_path(G1)
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
for i in np.keys():
for j in np[i].keys():
distances[i, j] = len(sp[i][j])-1
return distances

def getSPGraph(G):
"""Transform graph G to its corresponding shortest-paths graph.
Parameters
----------
G : NetworkX graph
The graph to be tramsformed.
Return
------
S : NetworkX graph
The shortest-paths graph corresponding to G.
Notes
------
For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
return floydTransformation(G)
def floydTransformation(G):
"""Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
Parameters
----------
G : NetworkX graph
The graph to be tramsformed.
Return
------
S : NetworkX graph
The shortest-paths graph corresponding to G.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered
S = nx.Graph()
S.add_nodes_from(G.nodes(data=True))
for i in range(0, G.number_of_nodes()):
for j in range(0, G.number_of_nodes()):
S.add_edge(i, j, cost = spMatrix[i, j])
return S

+ 0
- 5
pygraph/c_ext/Makefile View File

@@ -1,5 +0,0 @@
# You must specify your env variable LSAPE_DIR
#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/

liblsap.so:lsap.cpp
g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR)

+ 0
- 6
pygraph/c_ext/README.md View File

@@ -1,6 +0,0 @@
Python wrapper for lsape method

Specify your LSAPE_DIR env variable with the location of the source
code to compile

source code : https://bougleux.users.greyc.fr/lsape/

+ 0
- 17
pygraph/c_ext/__init__.py View File

@@ -1,17 +0,0 @@
# -*-coding:utf-8 -*-
"""Pygraph - c_ext module

This package binds some C++ code to python

lsape_binders.py : binders to C++ code of LSAPE methods implemented in
https://bougleux.users.greyc.fr/lsape/

"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

# import sub modules
from pygraph.c_ext import lsape_binders

+ 0
- 43
pygraph/c_ext/lsap.cpp View File

@@ -1,43 +0,0 @@
/*
Python wrapper
*/

#include "hungarian-lsape.hh"
#include "hungarian-lsap.hh"

#include <cstdio>

extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){
double * u = new double[nm];
double * v = new double[nm];

int * rho_int = new int[nm];
int * varrho_int = new int[nm];

hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int);
//Find a better way to do
for (int i =0;i<nm;i++){
rho[i] = (long)(rho_int[i]);
varrho[i] = (long)(varrho_int[i]);
}
return 0;
}



extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){
double * u = new double[n];
double * v = new double[m];

int * rho_int = new int[n];
int * varrho_int = new int[m];

hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v);
for (int i =0;i<n;i++)
rho[i] = (long)(rho_int[i]);

for (int i =0;i<m;i++)
varrho[i] = (long)(varrho_int[i]);
return 0;
}

+ 0
- 23
pygraph/c_ext/lsape_binders.py View File

@@ -1,23 +0,0 @@
import numpy as np
import ctypes as c
from ctypes import cdll
import os.path

def lsap_solverHG(C):
''' Binding for lsape hungarian solver '''

nm = C.shape[0]
dll_name = 'liblsap.so'
lib = cdll.LoadLibrary(os.path.abspath(
os.path.join(os.path.dirname(__file__), dll_name)))
lib.lsap.restype = c.c_int
rho = np.zeros((nm, 1), int)
varrho = np.zeros((nm, 1), int)
C[C == np.inf] = 10000

lib.lsap(c.c_void_p(C.transpose().ctypes.data),
c.c_int(nm),
c.c_void_p(rho.ctypes.data),
c.c_void_p(varrho.ctypes.data))

return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho])

+ 0
- 72
pygraph/ged/GED.py View File

@@ -1,72 +0,0 @@
from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction
from pygraph.ged.costfunctions import NeighboorhoodCostFunction
from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping
from scipy.optimize import linear_sum_assignment

def ged(G1, G2, method='Riesen', rho=None, varrho=None,
cf=ConstantCostFunction(1, 3, 1, 3),
solver=linear_sum_assignment):
"""Compute Graph Edit Distance between G1 and G2 according to mapping
encoded within rho and varrho. Graph's node must be indexed by a
index which is used is rho and varrho
NB: Utilisation de
dictionnaire pour etre plus versatile ?

"""
if ((rho is None) or (varrho is None)):
if(method == 'Riesen'):
cf_bp = RiesenCostFunction(cf,lsap_solver=solver)
elif(method == 'Neighboorhood'):
cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver)
elif(method == 'Basic'):
cf_bp = cf
else:
raise NameError('Non existent method ')

rho, varrho = getOptimalMapping(
computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver)

n = G1.number_of_nodes()
m = G2.number_of_nodes()
ged = 0
for i in G1.nodes():
phi_i = rho[i]
if(phi_i >= m):
ged += cf.cnd(i, G1)
else:
ged += cf.cns(i, phi_i, G1, G2)
for j in G2.nodes():
phi_j = varrho[j]
if(phi_j >= n):
ged += cf.cni(j, G2)

for e in G1.edges(data=True):
i = e[0]
j = e[1]
phi_i = rho[i]
phi_j = rho[j]
if (phi_i < m) and (phi_j < m):
mappedEdge = len(list(filter(lambda x: True if
x == phi_j else False, G2[phi_i])))
if(mappedEdge):
e2 = [phi_i, phi_j, G2[phi_i][phi_j]]
min_cost = min(cf.ces(e, e2, G1, G2),
cf.ced(e, G1) + cf.cei(e2, G2))
ged += min_cost
else:
ged += cf.ced(e, G1)
else:
ged += cf.ced(e, G1)
for e in G2.edges(data=True):
i = e[0]
j = e[1]
phi_i = varrho[i]
phi_j = varrho[j]
if (phi_i < n) and (phi_j < n):
mappedEdge = len(list(filter(lambda x: True if x == phi_j
else False, G1[phi_i])))
if(not mappedEdge):
ged += cf.cei(e, G2)
else:
ged += cf.ced(e, G2)
return ged, rho, varrho

+ 0
- 17
pygraph/ged/__init__.py View File

@@ -1,17 +0,0 @@
# -*-coding:utf-8 -*-
"""Pygraph - ged module

Implement some methods to compute ged between graphs


"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

from pygraph.ged import costfunctions
from pygraph.ged import bipartiteGED
from pygraph.ged import GED


+ 0
- 33
pygraph/ged/bipartiteGED.py View File

@@ -1,33 +0,0 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from pygraph.ged.costfunctions import ConstantCostFunction


def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)):
"""Compute a Cost Matrix according to cost function cf"""
n = G1.number_of_nodes()
m = G2.number_of_nodes()
nm = n + m
C = np.ones([nm, nm])*np.inf
C[n:, m:] = 0

for u in G1.nodes():
for v in G2.nodes():
cost = cf.cns(u, v, G1, G2)
C[u, v] = cost

for v in G1.nodes():
C[v, m + v] = cf.cnd(v, G1)

for v in G2.nodes():
C[n + v, v] = cf.cni(v, G2)
return C


def getOptimalMapping(C, lsap_solver=linear_sum_assignment):
"""Compute an optimal linear mapping according to cost Matrix C
inclure les progs C de Seb

"""
row_ind, col_ind = lsap_solver(C)
return col_ind, row_ind[np.argsort(col_ind)]

+ 0
- 138
pygraph/ged/costfunctions.py View File

@@ -1,138 +0,0 @@
import numpy as np
from scipy.optimize import linear_sum_assignment


class ConstantCostFunction:
""" Define a symmetric constant cost fonction for edit operations """
def __init__(self, cns, cni, ces, cei):
self.cns_ = cns
self.cni_ = self.cnd_ = cni
self.ces_ = ces
self.cei_ = self.ced_ = cei

def cns(self, node_u, node_v, g1, g2):
""" return substitution edit operation cost between node_u of G1 and node_v of G2"""
return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_

def cnd(self, u, G1):
return self.cnd_

def cni(self, v, G2):
return self.cni_

def ces(self, e1, e2, G1, G2):
"""tester avec des attributs autres que symboliques en testant
l'operateur __eq__"""
return (e1[2]['label'] != e2[2]['label'])*self.ces_

def ced(self, e1, G1):
return self.ced_

def cei(self, e2, G2):
return self.cei_


class RiesenCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
n = len(G1[u])
m = len(G2[v])
sub_C = np.ones([n+m, n+m]) * np.inf
sub_C[n:, m:] = 0
i = 0
l_nbr_u = G1[u]
l_nbr_v = G2[v]
for nbr_u in l_nbr_u:
j = 0
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
j += 1
row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return self.cf_.cns(u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1)
return self.cf_.cnd(u,G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2)
return self.cf_.cni(v, G2) + cost


class NeighboorhoodCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
n = len(G1[u])
m = len(G2[v])
sub_C = np.ones([n+m, n+m]) * np.inf
sub_C[n:, m:] = 0
i = 0
l_nbr_u = G1[u]
l_nbr_v = G2[v]
for nbr_u in l_nbr_u:
j = 0
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] += self.cf_.cni(nbr_v, G2)
j += 1

row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return self.cf_.cns(u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1)
return self.cf_.cnd(u, G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2)
return self.cf_.cni(v, G2) + cost

+ 0
- 0
pygraph/kernels/.gitignore View File


+ 2
- 2
pygraph/utils/__init__.py View File

@@ -13,5 +13,5 @@ __version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

from pygraph.utils import graphfiles
from pygraph.utils import utils
from utils import graphfiles
from utils import utils

+ 2
- 2
pygraph/utils/utils.py View File

@@ -5,8 +5,8 @@ import numpy as np
def getSPLengths(G1):
sp = nx.shortest_path(G1)
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
for i in sp.keys():
for j in sp[i].keys():
for i in np.keys():
for j in np[i].keys():
distances[i, j] = len(sp[i][j])-1
return distances



+ 0
- 5
tests/README.md View File

@@ -1,5 +0,0 @@
To use the library :
$> virtualenv --python=/usr/bin/python3.5 venv
$> pip install -r requirements.txt
$> source venv/bin/activate
... Go use pygraph

+ 0
- 66
tests/opt.py View File

@@ -1,66 +0,0 @@
import ot
import sys
import pathlib
sys.path.insert(0, "../")

from pygraph.utils.graphfiles import loadDataset
from pygraph.ged.costfunctions import ConstantCostFunction
from pygraph.utils.utils import getSPLengths
from tqdm import tqdm
import numpy as np
from scipy.optimize import linear_sum_assignment
from pygraph.ged.GED import ged
import scipy

def pad(C, n):
C_pad = np.zeros((n, n))
C_pad[:C.shape[0], :C.shape[1]] = C
return C_pad

if (__name__ == "__main__"):
ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds"
dataset, y = loadDataset(ds_filename)
cf = ConstantCostFunction(1, 3, 1, 3)
N = len(dataset)

pairs = list()
ged_distances = list() #np.zeros((N, N))
gw_distances = list() #np.zeros((N, N))
for i in tqdm(range(0, N)):
for j in tqdm(range(i, N)):
G1 = dataset[i]
G2 = dataset[j]
n = G1.number_of_nodes()
m = G2.number_of_nodes()
if(n == m):
C1 = getSPLengths(G1)
C2 = getSPLengths(G2)

C1 /= C1.max()
C2 /= C2.max()

dim = max(n, m)
if(n < m):
C1 = pad(C1, dim)
elif (m < n):
C2 = pad(C2, dim)

p = ot.unif(dim)
q = ot.unif(dim)

gw = ot.gromov_wasserstein(C1, C2, p, q,
'square_loss', epsilon=5e-3)
row_ind, col_ind = linear_sum_assignment(-gw)
rho = col_ind
varrho = row_ind[np.argsort(col_ind)]
pairs.append((i,j))
gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0])

ged_distances.append(ged(G1, G2, cf=cf)[0])

print("Moyenne sur Riesen : {}".format(np.mean(ged_distances)))
print("Moyenne sur GW : {} ".format(np.mean(gw_distances)))

np.save("distances_riesen", ged_distances)
np.save("distances_gw", gw_distances)

+ 0
- 16
tests/requirements.txt View File

@@ -1,16 +0,0 @@
cycler==0.10.0
Cython==0.27.3
decorator==4.1.2
matplotlib==2.1.0
networkx==2.0
numpy==1.13.3
pkg-resources==0.0.0
POT==0.4.0
pyparsing==2.2.0
python-dateutil==2.6.1
pytz==2017.3
scikit-learn==0.19.1
scipy==1.0.0
six==1.11.0
sklearn==0.0
tqdm==4.19.4

Loading…
Cancel
Save