Browse Source

Merge branch 'ljia' of https://git.litislab.fr/bgauzere/py-graph into ljia

v0.1
jajupmochi 7 years ago
parent
commit
1dbe6630c8
19 changed files with 863 additions and 0 deletions
  1. +170
    -0
      notebooks/py-graph_test.ipynb
  2. +21
    -0
      pygraph/__init__.py
  3. +5
    -0
      pygraph/c_ext/Makefile
  4. +6
    -0
      pygraph/c_ext/README.md
  5. +17
    -0
      pygraph/c_ext/__init__.py
  6. +43
    -0
      pygraph/c_ext/lsap.cpp
  7. +23
    -0
      pygraph/c_ext/lsape_binders.py
  8. +72
    -0
      pygraph/ged/GED.py
  9. +17
    -0
      pygraph/ged/__init__.py
  10. +33
    -0
      pygraph/ged/bipartiteGED.py
  11. +138
    -0
      pygraph/ged/costfunctions.py
  12. +0
    -0
      pygraph/kernels/.gitignore
  13. +68
    -0
      pygraph/kernels/spkernel.py
  14. +17
    -0
      pygraph/utils/__init__.py
  15. +87
    -0
      pygraph/utils/graphfiles.py
  16. +59
    -0
      pygraph/utils/utils.py
  17. +5
    -0
      tests/README.md
  18. +66
    -0
      tests/opt.py
  19. +16
    -0
      tests/requirements.txt

+ 170
- 0
notebooks/py-graph_test.ipynb View File

@@ -0,0 +1,170 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import paths\n",
"\n",
"import pygraph\n",
"\n",
"from pygraph.utils.graphfiles import loadDataset\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import networkx as nx\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# We load a ds dataset\n",
"# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n",
"dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n",
"100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n",
"100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n",
"100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n"
]
}
],
"source": [
"#Compute graph edit distances\n",
"\n",
"from tqdm import tqdm\n",
"from pygraph.c_ext.lsape_binders import lsap_solverHG\n",
"from pygraph.ged.costfunctions import ConstantCostFunction\n",
"from pygraph.ged.GED import ged\n",
"import time\n",
"\n",
"cf = ConstantCostFunction(1,3,1,3)\n",
"N=len(dataset)\n",
"\n",
"methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n",
"ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n",
"\n",
"times = list()\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n",
"times.append(time.clock() - start)\n",
"\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n",
"\n",
"times.append(time.clock() - start)\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n",
"times.append(time.clock() - start)\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n",
"times.append(time.clock() - start)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" method \t mean \t mean \t time\n",
" Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n",
" Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n",
" Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n",
" Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n"
]
}
],
"source": [
"print(\" method \\t mean \\t mean \\t time\")\n",
"data = list()\n",
"for i in range(0,len(ged_distances)):\n",
" ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n",
" print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
},
"name": "py-graph_test.ipynb"
},
"nbformat": 4,
"nbformat_minor": 2
}

+ 21
- 0
pygraph/__init__.py View File

@@ -0,0 +1,21 @@
# -*-coding:utf-8 -*-
"""
Pygraph

This package contains 4 sub packages :
* c_ext : binders to C++ code
* ged : allows to compute graph edit distance between networkX graphs
* kernels : computation of graph kernels, ie graph similarity measure compatible with SVM
* notebooks : examples of code using this library
* utils : Diverse computation on graphs
"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"
# import sub modules
from pygraph import c_ext
from pygraph import ged
from pygraph import utils

+ 5
- 0
pygraph/c_ext/Makefile View File

@@ -0,0 +1,5 @@
# You must specify your env variable LSAPE_DIR
#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/

liblsap.so:lsap.cpp
g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR)

+ 6
- 0
pygraph/c_ext/README.md View File

@@ -0,0 +1,6 @@
Python wrapper for lsape method

Specify your LSAPE_DIR env variable with the location of the source
code to compile

source code : https://bougleux.users.greyc.fr/lsape/

+ 17
- 0
pygraph/c_ext/__init__.py View File

@@ -0,0 +1,17 @@
# -*-coding:utf-8 -*-
"""Pygraph - c_ext module

This package binds some C++ code to python

lsape_binders.py : binders to C++ code of LSAPE methods implemented in
https://bougleux.users.greyc.fr/lsape/

"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

# import sub modules
from pygraph.c_ext import lsape_binders

+ 43
- 0
pygraph/c_ext/lsap.cpp View File

@@ -0,0 +1,43 @@
/*
Python wrapper
*/

#include "hungarian-lsape.hh"
#include "hungarian-lsap.hh"

#include <cstdio>

extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){
double * u = new double[nm];
double * v = new double[nm];

int * rho_int = new int[nm];
int * varrho_int = new int[nm];

hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int);
//Find a better way to do
for (int i =0;i<nm;i++){
rho[i] = (long)(rho_int[i]);
varrho[i] = (long)(varrho_int[i]);
}
return 0;
}



extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){
double * u = new double[n];
double * v = new double[m];

int * rho_int = new int[n];
int * varrho_int = new int[m];

hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v);
for (int i =0;i<n;i++)
rho[i] = (long)(rho_int[i]);

for (int i =0;i<m;i++)
varrho[i] = (long)(varrho_int[i]);
return 0;
}

+ 23
- 0
pygraph/c_ext/lsape_binders.py View File

@@ -0,0 +1,23 @@
import numpy as np
import ctypes as c
from ctypes import cdll
import os.path

def lsap_solverHG(C):
''' Binding for lsape hungarian solver '''

nm = C.shape[0]
dll_name = 'liblsap.so'
lib = cdll.LoadLibrary(os.path.abspath(
os.path.join(os.path.dirname(__file__), dll_name)))
lib.lsap.restype = c.c_int
rho = np.zeros((nm, 1), int)
varrho = np.zeros((nm, 1), int)
C[C == np.inf] = 10000

lib.lsap(c.c_void_p(C.transpose().ctypes.data),
c.c_int(nm),
c.c_void_p(rho.ctypes.data),
c.c_void_p(varrho.ctypes.data))

return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho])

+ 72
- 0
pygraph/ged/GED.py View File

@@ -0,0 +1,72 @@
from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction
from pygraph.ged.costfunctions import NeighboorhoodCostFunction
from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping
from scipy.optimize import linear_sum_assignment

def ged(G1, G2, method='Riesen', rho=None, varrho=None,
cf=ConstantCostFunction(1, 3, 1, 3),
solver=linear_sum_assignment):
"""Compute Graph Edit Distance between G1 and G2 according to mapping
encoded within rho and varrho. Graph's node must be indexed by a
index which is used is rho and varrho
NB: Utilisation de
dictionnaire pour etre plus versatile ?

"""
if ((rho is None) or (varrho is None)):
if(method == 'Riesen'):
cf_bp = RiesenCostFunction(cf,lsap_solver=solver)
elif(method == 'Neighboorhood'):
cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver)
elif(method == 'Basic'):
cf_bp = cf
else:
raise NameError('Non existent method ')

rho, varrho = getOptimalMapping(
computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver)

n = G1.number_of_nodes()
m = G2.number_of_nodes()
ged = 0
for i in G1.nodes():
phi_i = rho[i]
if(phi_i >= m):
ged += cf.cnd(i, G1)
else:
ged += cf.cns(i, phi_i, G1, G2)
for j in G2.nodes():
phi_j = varrho[j]
if(phi_j >= n):
ged += cf.cni(j, G2)

for e in G1.edges(data=True):
i = e[0]
j = e[1]
phi_i = rho[i]
phi_j = rho[j]
if (phi_i < m) and (phi_j < m):
mappedEdge = len(list(filter(lambda x: True if
x == phi_j else False, G2[phi_i])))
if(mappedEdge):
e2 = [phi_i, phi_j, G2[phi_i][phi_j]]
min_cost = min(cf.ces(e, e2, G1, G2),
cf.ced(e, G1) + cf.cei(e2, G2))
ged += min_cost
else:
ged += cf.ced(e, G1)
else:
ged += cf.ced(e, G1)
for e in G2.edges(data=True):
i = e[0]
j = e[1]
phi_i = varrho[i]
phi_j = varrho[j]
if (phi_i < n) and (phi_j < n):
mappedEdge = len(list(filter(lambda x: True if x == phi_j
else False, G1[phi_i])))
if(not mappedEdge):
ged += cf.cei(e, G2)
else:
ged += cf.ced(e, G2)
return ged, rho, varrho

+ 17
- 0
pygraph/ged/__init__.py View File

@@ -0,0 +1,17 @@
# -*-coding:utf-8 -*-
"""Pygraph - ged module

Implement some methods to compute ged between graphs


"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

from pygraph.ged import costfunctions
from pygraph.ged import bipartiteGED
from pygraph.ged import GED


+ 33
- 0
pygraph/ged/bipartiteGED.py View File

@@ -0,0 +1,33 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from pygraph.ged.costfunctions import ConstantCostFunction


def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)):
"""Compute a Cost Matrix according to cost function cf"""
n = G1.number_of_nodes()
m = G2.number_of_nodes()
nm = n + m
C = np.ones([nm, nm])*np.inf
C[n:, m:] = 0

for u in G1.nodes():
for v in G2.nodes():
cost = cf.cns(u, v, G1, G2)
C[u, v] = cost

for v in G1.nodes():
C[v, m + v] = cf.cnd(v, G1)

for v in G2.nodes():
C[n + v, v] = cf.cni(v, G2)
return C


def getOptimalMapping(C, lsap_solver=linear_sum_assignment):
"""Compute an optimal linear mapping according to cost Matrix C
inclure les progs C de Seb

"""
row_ind, col_ind = lsap_solver(C)
return col_ind, row_ind[np.argsort(col_ind)]

+ 138
- 0
pygraph/ged/costfunctions.py View File

@@ -0,0 +1,138 @@
import numpy as np
from scipy.optimize import linear_sum_assignment


class ConstantCostFunction:
""" Define a symmetric constant cost fonction for edit operations """
def __init__(self, cns, cni, ces, cei):
self.cns_ = cns
self.cni_ = self.cnd_ = cni
self.ces_ = ces
self.cei_ = self.ced_ = cei

def cns(self, node_u, node_v, g1, g2):
""" return substitution edit operation cost between node_u of G1 and node_v of G2"""
return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_

def cnd(self, u, G1):
return self.cnd_

def cni(self, v, G2):
return self.cni_

def ces(self, e1, e2, G1, G2):
"""tester avec des attributs autres que symboliques en testant
l'operateur __eq__"""
return (e1[2]['label'] != e2[2]['label'])*self.ces_

def ced(self, e1, G1):
return self.ced_

def cei(self, e2, G2):
return self.cei_


class RiesenCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
n = len(G1[u])
m = len(G2[v])
sub_C = np.ones([n+m, n+m]) * np.inf
sub_C[n:, m:] = 0
i = 0
l_nbr_u = G1[u]
l_nbr_v = G2[v]
for nbr_u in l_nbr_u:
j = 0
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
j += 1
row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return self.cf_.cns(u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1)
return self.cf_.cnd(u,G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2)
return self.cf_.cni(v, G2) + cost


class NeighboorhoodCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
n = len(G1[u])
m = len(G2[v])
sub_C = np.ones([n+m, n+m]) * np.inf
sub_C[n:, m:] = 0
i = 0
l_nbr_u = G1[u]
l_nbr_v = G2[v]
for nbr_u in l_nbr_u:
j = 0
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] += self.cf_.cni(nbr_v, G2)
j += 1

row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return self.cf_.cns(u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1)
return self.cf_.cnd(u, G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2)
return self.cf_.cni(v, G2) + cost

+ 0
- 0
pygraph/kernels/.gitignore View File


+ 68
- 0
pygraph/kernels/spkernel.py View File

@@ -0,0 +1,68 @@
import sys
import pathlib
sys.path.insert(0, "../")


import networkx as nx
import numpy as np
import time

from utils.utils import getSPGraph


def spkernel(*args):
"""Calculate shortest-path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
Sn = [] # get shortest path graphs of Gn
for i in range(0, len(Gn)):
Sn.append(getSPGraph(Gn[i]))

start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Sn[i].edges(data = True):
for e2 in Sn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] += (0 if i == j else 1)

print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
G1 = args[0]
G2 = args[1]
kernel = 0
for e1 in G1.edges(data = True):
for e2 in G2.edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
kernel += 1

print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
return kernel

+ 17
- 0
pygraph/utils/__init__.py View File

@@ -0,0 +1,17 @@
# -*-coding:utf-8 -*-
"""Pygraph - utils module

Implement some methods to manage graphs
graphfiles.py : load .gxl and .ct files
utils.py : compute some properties on networkX graphs


"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

from pygraph.utils import graphfiles
from pygraph.utils import utils

+ 87
- 0
pygraph/utils/graphfiles.py View File

@@ -0,0 +1,87 @@
import networkx as nx
def loadCT(filename):
"""load data from .ct file.
Notes
------
a typical example of data in .ct is like this:
3 2 <- number of nodes and edges
0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?
0.0000 0.0000 0.0000 C
0.0000 0.0000 0.0000 O
1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?
2 3 1 1
"""
content = open(filename).read().splitlines()
G = nx.Graph(name=str(content[0])) # set name of the graph
tmp = content[1].split(" ")
if tmp[0] == '':
nb_nodes = int(tmp[1]) # number of the nodes
nb_edges = int(tmp[2]) # number of the edges
else:
nb_nodes = int(tmp[0])
nb_edges = int(tmp[1])

for i in range(0, nb_nodes):
tmp = content[i + 2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_node(i, label=tmp[3])

for i in range(0, nb_edges):
tmp = content[i + G.number_of_nodes() + 2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
return G


def loadGXL(filename):
import networkx as nx
import xml.etree.ElementTree as ET

tree = ET.parse(filename)
root = tree.getroot()
index = 0
G = nx.Graph()
dic={}
for node in root.iter('node'):
label = node.find('attr')[0].text
dic[node.attrib['id']] = index
G.add_node(index, id=node.attrib['id'], label=label)
index += 1
for edge in root.iter('edge'):
label = edge.find('attr')[0].text
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
return G
def loadDataset(filename):
"""load file list of the dataset.
"""
from os.path import dirname, splitext

dirname_dataset = dirname(filename)
extension = splitext(filename)[1][1:]
data = []
y = []
if(extension == "ds"):
content = open(filename).read().splitlines()
for i in range(0, len(content)):
tmp = content[i].split(' ')
data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
y.append(float(tmp[1]))
elif(extension == "cxl"):
import xml.etree.ElementTree as ET

tree = ET.parse(filename)
root = tree.getroot()
data = []
y = []
for graph in root.iter('print'):
mol_filename = graph.attrib['file']
mol_class = graph.attrib['class']
data.append(loadGXL(dirname_dataset + '/' + mol_filename))
y.append(mol_class)

return data, y

+ 59
- 0
pygraph/utils/utils.py View File

@@ -0,0 +1,59 @@
import networkx as nx
import numpy as np


def getSPLengths(G1):
sp = nx.shortest_path(G1)
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
for i in sp.keys():
for j in sp[i].keys():
distances[i, j] = len(sp[i][j])-1
return distances

def getSPGraph(G):
"""Transform graph G to its corresponding shortest-paths graph.
Parameters
----------
G : NetworkX graph
The graph to be tramsformed.
Return
------
S : NetworkX graph
The shortest-paths graph corresponding to G.
Notes
------
For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
return floydTransformation(G)
def floydTransformation(G):
"""Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
Parameters
----------
G : NetworkX graph
The graph to be tramsformed.
Return
------
S : NetworkX graph
The shortest-paths graph corresponding to G.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered
S = nx.Graph()
S.add_nodes_from(G.nodes(data=True))
for i in range(0, G.number_of_nodes()):
for j in range(0, G.number_of_nodes()):
S.add_edge(i, j, cost = spMatrix[i, j])
return S

+ 5
- 0
tests/README.md View File

@@ -0,0 +1,5 @@
To use the library :
$> virtualenv --python=/usr/bin/python3.5 venv
$> pip install -r requirements.txt
$> source venv/bin/activate
... Go use pygraph

+ 66
- 0
tests/opt.py View File

@@ -0,0 +1,66 @@
import ot
import sys
import pathlib
sys.path.insert(0, "../")

from pygraph.utils.graphfiles import loadDataset
from pygraph.ged.costfunctions import ConstantCostFunction
from pygraph.utils.utils import getSPLengths
from tqdm import tqdm
import numpy as np
from scipy.optimize import linear_sum_assignment
from pygraph.ged.GED import ged
import scipy

def pad(C, n):
C_pad = np.zeros((n, n))
C_pad[:C.shape[0], :C.shape[1]] = C
return C_pad

if (__name__ == "__main__"):
ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds"
dataset, y = loadDataset(ds_filename)
cf = ConstantCostFunction(1, 3, 1, 3)
N = len(dataset)

pairs = list()
ged_distances = list() #np.zeros((N, N))
gw_distances = list() #np.zeros((N, N))
for i in tqdm(range(0, N)):
for j in tqdm(range(i, N)):
G1 = dataset[i]
G2 = dataset[j]
n = G1.number_of_nodes()
m = G2.number_of_nodes()
if(n == m):
C1 = getSPLengths(G1)
C2 = getSPLengths(G2)

C1 /= C1.max()
C2 /= C2.max()

dim = max(n, m)
if(n < m):
C1 = pad(C1, dim)
elif (m < n):
C2 = pad(C2, dim)

p = ot.unif(dim)
q = ot.unif(dim)

gw = ot.gromov_wasserstein(C1, C2, p, q,
'square_loss', epsilon=5e-3)
row_ind, col_ind = linear_sum_assignment(-gw)
rho = col_ind
varrho = row_ind[np.argsort(col_ind)]
pairs.append((i,j))
gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0])

ged_distances.append(ged(G1, G2, cf=cf)[0])

print("Moyenne sur Riesen : {}".format(np.mean(ged_distances)))
print("Moyenne sur GW : {} ".format(np.mean(gw_distances)))

np.save("distances_riesen", ged_distances)
np.save("distances_gw", gw_distances)

+ 16
- 0
tests/requirements.txt View File

@@ -0,0 +1,16 @@
cycler==0.10.0
Cython==0.27.3
decorator==4.1.2
matplotlib==2.1.0
networkx==2.0
numpy==1.13.3
pkg-resources==0.0.0
POT==0.4.0
pyparsing==2.2.0
python-dateutil==2.6.1
pytz==2017.3
scikit-learn==0.19.1
scipy==1.0.0
six==1.11.0
sklearn==0.0
tqdm==4.19.4

Loading…
Cancel
Save