Browse Source

Merge branch 'master' of https://git.litislab.fr/bgauzere/py-graph

add pygraph/kernels/spkernel.py
modify pygraph/utils/util.py and pygraph/utils/graphfiles.py
v0.1
jajupmochi 7 years ago
parent
commit
d8a96ce408
21 changed files with 789 additions and 75 deletions
  1. +175
    -0
      notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb
  2. +170
    -0
      notebooks/py-graph_test.ipynb
  3. +21
    -0
      pygraph/__init__.py
  4. +5
    -0
      pygraph/c_ext/Makefile
  5. +6
    -0
      pygraph/c_ext/README.md
  6. +17
    -0
      pygraph/c_ext/__init__.py
  7. +43
    -0
      pygraph/c_ext/lsap.cpp
  8. +23
    -0
      pygraph/c_ext/lsape_binders.py
  9. +15
    -17
      pygraph/ged/GED.py
  10. +17
    -0
      pygraph/ged/__init__.py
  11. +8
    -8
      pygraph/ged/bipartiteGED.py
  12. +36
    -31
      pygraph/ged/costfunctions.py
  13. +0
    -0
      pygraph/kernels/.gitignore
  14. +68
    -0
      pygraph/kernels/spkernel.py
  15. +17
    -0
      pygraph/utils/__init__.py
  16. +22
    -9
      pygraph/utils/graphfiles.py
  17. +59
    -0
      pygraph/utils/utils.py
  18. +5
    -0
      tests/README.md
  19. +66
    -0
      tests/opt.py
  20. +16
    -0
      tests/requirements.txt
  21. +0
    -10
      utils/utils.py

+ 175
- 0
notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb
File diff suppressed because it is too large
View File


+ 170
- 0
notebooks/py-graph_test.ipynb View File

@@ -0,0 +1,170 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import paths\n",
"\n",
"import pygraph\n",
"\n",
"from pygraph.utils.graphfiles import loadDataset\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import networkx as nx\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# We load a ds dataset\n",
"# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n",
"dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n",
"100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n",
"100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n",
"100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n"
]
}
],
"source": [
"#Compute graph edit distances\n",
"\n",
"from tqdm import tqdm\n",
"from pygraph.c_ext.lsape_binders import lsap_solverHG\n",
"from pygraph.ged.costfunctions import ConstantCostFunction\n",
"from pygraph.ged.GED import ged\n",
"import time\n",
"\n",
"cf = ConstantCostFunction(1,3,1,3)\n",
"N=len(dataset)\n",
"\n",
"methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n",
"ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n",
"\n",
"times = list()\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n",
"times.append(time.clock() - start)\n",
"\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n",
"\n",
"times.append(time.clock() - start)\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n",
"times.append(time.clock() - start)\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n",
"times.append(time.clock() - start)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" method \t mean \t mean \t time\n",
" Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n",
" Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n",
" Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n",
" Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n"
]
}
],
"source": [
"print(\" method \\t mean \\t mean \\t time\")\n",
"data = list()\n",
"for i in range(0,len(ged_distances)):\n",
" ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n",
" print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
},
"name": "py-graph_test.ipynb"
},
"nbformat": 4,
"nbformat_minor": 2
}

+ 21
- 0
pygraph/__init__.py View File

@@ -0,0 +1,21 @@
# -*-coding:utf-8 -*-
"""
Pygraph

This package contains 4 sub packages :
* c_ext : binders to C++ code
* ged : allows to compute graph edit distance between networkX graphs
* kernels : computation of graph kernels, ie graph similarity measure compatible with SVM
* notebooks : examples of code using this library
* utils : Diverse computation on graphs
"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"
# import sub modules
from pygraph import c_ext
from pygraph import ged
from pygraph import utils

+ 5
- 0
pygraph/c_ext/Makefile View File

@@ -0,0 +1,5 @@
# You must specify your env variable LSAPE_DIR
#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/

liblsap.so:lsap.cpp
g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR)

+ 6
- 0
pygraph/c_ext/README.md View File

@@ -0,0 +1,6 @@
Python wrapper for lsape method

Specify your LSAPE_DIR env variable with the location of the source
code to compile

source code : https://bougleux.users.greyc.fr/lsape/

+ 17
- 0
pygraph/c_ext/__init__.py View File

@@ -0,0 +1,17 @@
# -*-coding:utf-8 -*-
"""Pygraph - c_ext module

This package binds some C++ code to python

lsape_binders.py : binders to C++ code of LSAPE methods implemented in
https://bougleux.users.greyc.fr/lsape/

"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

# import sub modules
from pygraph.c_ext import lsape_binders

+ 43
- 0
pygraph/c_ext/lsap.cpp View File

@@ -0,0 +1,43 @@
/*
Python wrapper
*/

#include "hungarian-lsape.hh"
#include "hungarian-lsap.hh"

#include <cstdio>

extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){
double * u = new double[nm];
double * v = new double[nm];

int * rho_int = new int[nm];
int * varrho_int = new int[nm];

hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int);
//Find a better way to do
for (int i =0;i<nm;i++){
rho[i] = (long)(rho_int[i]);
varrho[i] = (long)(varrho_int[i]);
}
return 0;
}



extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){
double * u = new double[n];
double * v = new double[m];

int * rho_int = new int[n];
int * varrho_int = new int[m];

hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v);
for (int i =0;i<n;i++)
rho[i] = (long)(rho_int[i]);

for (int i =0;i<m;i++)
varrho[i] = (long)(varrho_int[i]);
return 0;
}

+ 23
- 0
pygraph/c_ext/lsape_binders.py View File

@@ -0,0 +1,23 @@
import numpy as np
import ctypes as c
from ctypes import cdll
import os.path

def lsap_solverHG(C):
''' Binding for lsape hungarian solver '''

nm = C.shape[0]
dll_name = 'liblsap.so'
lib = cdll.LoadLibrary(os.path.abspath(
os.path.join(os.path.dirname(__file__), dll_name)))
lib.lsap.restype = c.c_int
rho = np.zeros((nm, 1), int)
varrho = np.zeros((nm, 1), int)
C[C == np.inf] = 10000

lib.lsap(c.c_void_p(C.transpose().ctypes.data),
c.c_int(nm),
c.c_void_p(rho.ctypes.data),
c.c_void_p(varrho.ctypes.data))

return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho])

ged/GED.py → pygraph/ged/GED.py View File

@@ -1,10 +1,11 @@
from ged.costfunctions import BasicCostFunction, RiesenCostFunction
from ged.costfunctions import NeighboorhoodCostFunction
from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping
from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction
from pygraph.ged.costfunctions import NeighboorhoodCostFunction
from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping
from scipy.optimize import linear_sum_assignment

def ged(G1, G2, method='Riesen', rho=None, varrho=None,
cf=BasicCostFunction(1, 3, 1, 3)):
cf=ConstantCostFunction(1, 3, 1, 3),
solver=linear_sum_assignment):
"""Compute Graph Edit Distance between G1 and G2 according to mapping
encoded within rho and varrho. Graph's node must be indexed by a
index which is used is rho and varrho
@@ -14,31 +15,32 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None,
"""
if ((rho is None) or (varrho is None)):
if(method == 'Riesen'):
cf_bp = RiesenCostFunction(cf)
cf_bp = RiesenCostFunction(cf,lsap_solver=solver)
elif(method == 'Neighboorhood'):
cf_bp = NeighboorhoodCostFunction(cf)
cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver)
elif(method == 'Basic'):
cf_bp = cf
else:
raise NameError('Non existent method ')

rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp))
rho, varrho = getOptimalMapping(
computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver)

n = G1.number_of_nodes()
m = G2.number_of_nodes()
ged = 0
for i in G1.nodes_iter():
for i in G1.nodes():
phi_i = rho[i]
if(phi_i >= m):
ged += cf.cnd(i, G1)
else:
ged += cf.cns(i, phi_i, G1, G2)
for j in G2.nodes_iter():
for j in G2.nodes():
phi_j = varrho[j]
if(phi_j >= n):
ged += cf.cni(j, G2)

for e in G1.edges_iter(data=True):
for e in G1.edges(data=True):
i = e[0]
j = e[1]
phi_i = rho[i]
@@ -49,13 +51,13 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None,
if(mappedEdge):
e2 = [phi_i, phi_j, G2[phi_i][phi_j]]
min_cost = min(cf.ces(e, e2, G1, G2),
cf.ced(e, G1), cf.cei(e2, G2))
cf.ced(e, G1) + cf.cei(e2, G2))
ged += min_cost
else:
ged += cf.ced(e, G1)
else:
ged += cf.ced(e, G1)
for e in G2.edges_iter(data=True):
for e in G2.edges(data=True):
i = e[0]
j = e[1]
phi_i = varrho[i]
@@ -68,7 +70,3 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None,
else:
ged += cf.ced(e, G2)
return ged, rho, varrho


def computeDistanceMatrix(dataset):
pass

+ 17
- 0
pygraph/ged/__init__.py View File

@@ -0,0 +1,17 @@
# -*-coding:utf-8 -*-
"""Pygraph - ged module

Implement some methods to compute ged between graphs


"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

from pygraph.ged import costfunctions
from pygraph.ged import bipartiteGED
from pygraph.ged import GED


ged/bipartiteGED.py → pygraph/ged/bipartiteGED.py View File

@@ -1,9 +1,9 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from ged.costfunctions import BasicCostFunction
from pygraph.ged.costfunctions import ConstantCostFunction


def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)):
def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)):
"""Compute a Cost Matrix according to cost function cf"""
n = G1.number_of_nodes()
m = G2.number_of_nodes()
@@ -11,23 +11,23 @@ def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)):
C = np.ones([nm, nm])*np.inf
C[n:, m:] = 0

for u in G1.nodes_iter():
for v in G2.nodes_iter():
for u in G1.nodes():
for v in G2.nodes():
cost = cf.cns(u, v, G1, G2)
C[u, v] = cost

for v in G1.nodes_iter():
for v in G1.nodes():
C[v, m + v] = cf.cnd(v, G1)

for v in G2.nodes_iter():
for v in G2.nodes():
C[n + v, v] = cf.cni(v, G2)
return C


def getOptimalMapping(C):
def getOptimalMapping(C, lsap_solver=linear_sum_assignment):
"""Compute an optimal linear mapping according to cost Matrix C
inclure les progs C de Seb

"""
row_ind, col_ind = linear_sum_assignment(C)
row_ind, col_ind = lsap_solver(C)
return col_ind, row_ind[np.argsort(col_ind)]

ged/costfunctions.py → pygraph/ged/costfunctions.py View File

@@ -2,15 +2,17 @@ import numpy as np
from scipy.optimize import linear_sum_assignment


class BasicCostFunction:
class ConstantCostFunction:
""" Define a symmetric constant cost fonction for edit operations """
def __init__(self, cns, cni, ces, cei):
self.cns_ = cns
self.cni_ = self.cnd_ = cni
self.ces_ = ces
self.cei_ = self.ced_ = cei

def cns(self, u, v, G1, G2):
return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_
def cns(self, node_u, node_v, g1, g2):
""" return substitution edit operation cost between node_u of G1 and node_v of G2"""
return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_

def cnd(self, u, G1):
return self.cnd_
@@ -30,9 +32,11 @@ class BasicCostFunction:
return self.cei_


class RiesenCostFunction(BasicCostFunction):
def __init__(self, cf):
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_)
class RiesenCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
@@ -48,41 +52,43 @@ class RiesenCostFunction(BasicCostFunction):
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.ces(e1, e2, G1, G2)
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
j += 1
row_ind, col_ind = linear_sum_assignment(sub_C)
row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return BasicCostFunction.cns(self, u, v, G1, G2) + cost
return self.cf_.cns(u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1)
cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1)
return BasicCostFunction.cnd(self,u,G1) + cost
return self.cf_.cnd(u,G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2)
cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2)
return BasicCostFunction.cni(self, v, G2) + cost
return self.cf_.cni(v, G2) + cost


class NeighboorhoodCostFunction(BasicCostFunction):
def __init__(self, cf):
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_)
class NeighboorhoodCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
@@ -98,36 +104,35 @@ class NeighboorhoodCostFunction(BasicCostFunction):
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.ces(e1, e2, G1, G2)
sub_C[i, j] += BasicCostFunction.cns(self,
nbr_u, nbr_v, G1, G2)
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1)
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2)
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] += self.cf_.cni(nbr_v, G2)
j += 1

row_ind, col_ind = linear_sum_assignment(sub_C)
row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return BasicCostFunction.cns(self, u, v, G1, G2) + cost
return self.cf_.cns(u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1)
return BasicCostFunction.cnd(self, u, G1) + cost
cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1)
return self.cf_.cnd(u, G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2)
return BasicCostFunction.cni(self, v, G2) + cost
cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2)
return self.cf_.cni(v, G2) + cost

kernels/.gitignore → pygraph/kernels/.gitignore View File


+ 68
- 0
pygraph/kernels/spkernel.py View File

@@ -0,0 +1,68 @@
import sys
import pathlib
sys.path.insert(0, "../")


import networkx as nx
import numpy as np
import time

from utils.utils import getSPGraph


def spkernel(*args):
"""Calculate shortest-path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
Sn = [] # get shortest path graphs of Gn
for i in range(0, len(Gn)):
Sn.append(getSPGraph(Gn[i]))

start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Sn[i].edges(data = True):
for e2 in Sn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] += (0 if i == j else 1)

print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
G1 = args[0]
G2 = args[1]
kernel = 0
for e1 in G1.edges(data = True):
for e2 in G2.edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
kernel += 1

print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
return kernel

+ 17
- 0
pygraph/utils/__init__.py View File

@@ -0,0 +1,17 @@
# -*-coding:utf-8 -*-
"""Pygraph - utils module

Implement some methods to manage graphs
graphfiles.py : load .gxl and .ct files
utils.py : compute some properties on networkX graphs


"""

# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"

from pygraph.utils import graphfiles
from pygraph.utils import utils

utils/graphfiles.py → pygraph/utils/graphfiles.py View File

@@ -1,13 +1,25 @@
import networkx as nx


def loadCT(filename):
"""load data from .ct file.
Notes
------
a typical example of data in .ct is like this:
3 2 <- number of nodes and edges
0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?
0.0000 0.0000 0.0000 C
0.0000 0.0000 0.0000 O
1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?
2 3 1 1
"""
content = open(filename).read().splitlines()
G = nx.Graph(name=str(content[0]))
G = nx.Graph(name=str(content[0])) # set name of the graph
tmp = content[1].split(" ")
if tmp[0] == '':
nb_nodes = int(tmp[1])
nb_edges = int(tmp[2])
nb_nodes = int(tmp[1]) # number of the nodes
nb_edges = int(tmp[2]) # number of the edges
else:
nb_nodes = int(tmp[0])
nb_edges = int(tmp[1])
@@ -18,7 +30,7 @@ def loadCT(filename):
G.add_node(i, label=tmp[3])

for i in range(0, nb_edges):
tmp = content[i+G.number_of_nodes()+2].split(" ")
tmp = content[i + G.number_of_nodes() + 2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
return G
@@ -43,9 +55,10 @@ def loadGXL(filename):
label = edge.find('attr')[0].text
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
return G


def loadDataset(filename):
"""load file list of the dataset.
"""
from os.path import dirname, splitext

dirname_dataset = dirname(filename)
@@ -56,7 +69,7 @@ def loadDataset(filename):
content = open(filename).read().splitlines()
for i in range(0, len(content)):
tmp = content[i].split(' ')
data.append(loadCT(dirname_dataset + '/' + tmp[0]))
data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
y.append(float(tmp[1]))
elif(extension == "cxl"):
import xml.etree.ElementTree as ET

+ 59
- 0
pygraph/utils/utils.py View File

@@ -0,0 +1,59 @@
import networkx as nx
import numpy as np


def getSPLengths(G1):
sp = nx.shortest_path(G1)
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
for i in np.keys():
for j in np[i].keys():
distances[i, j] = len(sp[i][j])-1
return distances

def getSPGraph(G):
"""Transform graph G to its corresponding shortest-paths graph.
Parameters
----------
G : NetworkX graph
The graph to be tramsformed.
Return
------
S : NetworkX graph
The shortest-paths graph corresponding to G.
Notes
------
For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
return floydTransformation(G)
def floydTransformation(G):
"""Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
Parameters
----------
G : NetworkX graph
The graph to be tramsformed.
Return
------
S : NetworkX graph
The shortest-paths graph corresponding to G.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered
S = nx.Graph()
S.add_nodes_from(G.nodes(data=True))
for i in range(0, G.number_of_nodes()):
for j in range(0, G.number_of_nodes()):
S.add_edge(i, j, cost = spMatrix[i, j])
return S

+ 5
- 0
tests/README.md View File

@@ -0,0 +1,5 @@
To use the library :
$> virtualenv --python=/usr/bin/python3.5 venv
$> pip install -r requirements.txt
$> source venv/bin/activate
... Go use pygraph

+ 66
- 0
tests/opt.py View File

@@ -0,0 +1,66 @@
import ot
import sys
import pathlib
sys.path.insert(0, "../")

from pygraph.utils.graphfiles import loadDataset
from pygraph.ged.costfunctions import ConstantCostFunction
from pygraph.utils.utils import getSPLengths
from tqdm import tqdm
import numpy as np
from scipy.optimize import linear_sum_assignment
from pygraph.ged.GED import ged
import scipy

def pad(C, n):
C_pad = np.zeros((n, n))
C_pad[:C.shape[0], :C.shape[1]] = C
return C_pad

if (__name__ == "__main__"):
ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds"
dataset, y = loadDataset(ds_filename)
cf = ConstantCostFunction(1, 3, 1, 3)
N = len(dataset)

pairs = list()
ged_distances = list() #np.zeros((N, N))
gw_distances = list() #np.zeros((N, N))
for i in tqdm(range(0, N)):
for j in tqdm(range(i, N)):
G1 = dataset[i]
G2 = dataset[j]
n = G1.number_of_nodes()
m = G2.number_of_nodes()
if(n == m):
C1 = getSPLengths(G1)
C2 = getSPLengths(G2)

C1 /= C1.max()
C2 /= C2.max()

dim = max(n, m)
if(n < m):
C1 = pad(C1, dim)
elif (m < n):
C2 = pad(C2, dim)

p = ot.unif(dim)
q = ot.unif(dim)

gw = ot.gromov_wasserstein(C1, C2, p, q,
'square_loss', epsilon=5e-3)
row_ind, col_ind = linear_sum_assignment(-gw)
rho = col_ind
varrho = row_ind[np.argsort(col_ind)]
pairs.append((i,j))
gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0])

ged_distances.append(ged(G1, G2, cf=cf)[0])

print("Moyenne sur Riesen : {}".format(np.mean(ged_distances)))
print("Moyenne sur GW : {} ".format(np.mean(gw_distances)))

np.save("distances_riesen", ged_distances)
np.save("distances_gw", gw_distances)

+ 16
- 0
tests/requirements.txt View File

@@ -0,0 +1,16 @@
cycler==0.10.0
Cython==0.27.3
decorator==4.1.2
matplotlib==2.1.0
networkx==2.0
numpy==1.13.3
pkg-resources==0.0.0
POT==0.4.0
pyparsing==2.2.0
python-dateutil==2.6.1
pytz==2017.3
scikit-learn==0.19.1
scipy==1.0.0
six==1.11.0
sklearn==0.0
tqdm==4.19.4

+ 0
- 10
utils/utils.py View File

@@ -1,10 +0,0 @@
import networkx as nx
import numpy as np


def getSPLengths(G1):
sp = nx.shortest_path(G1)
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
for i in np.keys():
for j in np[i].keys():
distances[i, j] = len(sp[i][j])-1

Loading…
Cancel
Save