@@ -0,0 +1,170 @@ | |||||
{ | |||||
"cells": [ | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 1, | |||||
"metadata": { | |||||
"autoscroll": false, | |||||
"ein.tags": "worksheet-0", | |||||
"slideshow": { | |||||
"slide_type": "-" | |||||
} | |||||
}, | |||||
"outputs": [], | |||||
"source": [ | |||||
"import numpy as np\n", | |||||
"import paths\n", | |||||
"\n", | |||||
"import pygraph\n", | |||||
"\n", | |||||
"from pygraph.utils.graphfiles import loadDataset\n" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 2, | |||||
"metadata": { | |||||
"autoscroll": false, | |||||
"ein.tags": "worksheet-0", | |||||
"slideshow": { | |||||
"slide_type": "-" | |||||
} | |||||
}, | |||||
"outputs": [], | |||||
"source": [ | |||||
"import networkx as nx\n", | |||||
"import numpy as np\n", | |||||
"import matplotlib.pyplot as plt\n", | |||||
"\n", | |||||
"# We load a ds dataset\n", | |||||
"# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n", | |||||
"dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 3, | |||||
"metadata": { | |||||
"autoscroll": false, | |||||
"ein.tags": "worksheet-0", | |||||
"slideshow": { | |||||
"slide_type": "-" | |||||
} | |||||
}, | |||||
"outputs": [ | |||||
{ | |||||
"name": "stderr", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n", | |||||
"100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n", | |||||
"100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n", | |||||
"100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n" | |||||
] | |||||
} | |||||
], | |||||
"source": [ | |||||
"#Compute graph edit distances\n", | |||||
"\n", | |||||
"from tqdm import tqdm\n", | |||||
"from pygraph.c_ext.lsape_binders import lsap_solverHG\n", | |||||
"from pygraph.ged.costfunctions import ConstantCostFunction\n", | |||||
"from pygraph.ged.GED import ged\n", | |||||
"import time\n", | |||||
"\n", | |||||
"cf = ConstantCostFunction(1,3,1,3)\n", | |||||
"N=len(dataset)\n", | |||||
"\n", | |||||
"methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n", | |||||
"ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n", | |||||
"\n", | |||||
"times = list()\n", | |||||
"start = time.clock()\n", | |||||
"for i in tqdm(range(0,N)):\n", | |||||
" for j in range(0,N):\n", | |||||
" ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", | |||||
"times.append(time.clock() - start)\n", | |||||
"\n", | |||||
"\n", | |||||
"start = time.clock()\n", | |||||
"for i in tqdm(range(0,N)):\n", | |||||
" for j in range(0,N):\n", | |||||
" ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n", | |||||
"\n", | |||||
"times.append(time.clock() - start)\n", | |||||
"\n", | |||||
"start = time.clock()\n", | |||||
"for i in tqdm(range(0,N)):\n", | |||||
" for j in range(0,N):\n", | |||||
" ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n", | |||||
"times.append(time.clock() - start)\n", | |||||
"\n", | |||||
"start = time.clock()\n", | |||||
"for i in tqdm(range(0,N)):\n", | |||||
" for j in range(0,N):\n", | |||||
" ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n", | |||||
"times.append(time.clock() - start)" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 5, | |||||
"metadata": { | |||||
"autoscroll": false, | |||||
"ein.tags": "worksheet-0", | |||||
"slideshow": { | |||||
"slide_type": "-" | |||||
} | |||||
}, | |||||
"outputs": [ | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
" method \t mean \t mean \t time\n", | |||||
" Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n", | |||||
" Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n", | |||||
" Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n", | |||||
" Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n" | |||||
] | |||||
} | |||||
], | |||||
"source": [ | |||||
"print(\" method \\t mean \\t mean \\t time\")\n", | |||||
"data = list()\n", | |||||
"for i in range(0,len(ged_distances)):\n", | |||||
" ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n", | |||||
" print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": null, | |||||
"metadata": {}, | |||||
"outputs": [], | |||||
"source": [] | |||||
} | |||||
], | |||||
"metadata": { | |||||
"kernelspec": { | |||||
"display_name": "Python 3", | |||||
"language": "python", | |||||
"name": "python3" | |||||
}, | |||||
"language_info": { | |||||
"codemirror_mode": { | |||||
"name": "ipython", | |||||
"version": 3 | |||||
}, | |||||
"file_extension": ".py", | |||||
"mimetype": "text/x-python", | |||||
"name": "python", | |||||
"nbconvert_exporter": "python", | |||||
"pygments_lexer": "ipython3", | |||||
"version": "3.6.2" | |||||
}, | |||||
"name": "py-graph_test.ipynb" | |||||
}, | |||||
"nbformat": 4, | |||||
"nbformat_minor": 2 | |||||
} |
@@ -0,0 +1,21 @@ | |||||
# -*-coding:utf-8 -*- | |||||
""" | |||||
Pygraph | |||||
This package contains 4 sub packages : | |||||
* c_ext : binders to C++ code | |||||
* ged : allows to compute graph edit distance between networkX graphs | |||||
* kernels : computation of graph kernels, ie graph similarity measure compatible with SVM | |||||
* notebooks : examples of code using this library | |||||
* utils : Diverse computation on graphs | |||||
""" | |||||
# info | |||||
__version__ = "0.1" | |||||
__author__ = "Benoit Gaüzère" | |||||
__date__ = "November 2017" | |||||
# import sub modules | |||||
from pygraph import c_ext | |||||
from pygraph import ged | |||||
from pygraph import utils |
@@ -0,0 +1,5 @@ | |||||
# You must specify your env variable LSAPE_DIR | |||||
#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ | |||||
liblsap.so:lsap.cpp | |||||
g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) |
@@ -0,0 +1,6 @@ | |||||
Python wrapper for lsape method | |||||
Specify your LSAPE_DIR env variable with the location of the source | |||||
code to compile | |||||
source code : https://bougleux.users.greyc.fr/lsape/ |
@@ -0,0 +1,17 @@ | |||||
# -*-coding:utf-8 -*- | |||||
"""Pygraph - c_ext module | |||||
This package binds some C++ code to python | |||||
lsape_binders.py : binders to C++ code of LSAPE methods implemented in | |||||
https://bougleux.users.greyc.fr/lsape/ | |||||
""" | |||||
# info | |||||
__version__ = "0.1" | |||||
__author__ = "Benoit Gaüzère" | |||||
__date__ = "November 2017" | |||||
# import sub modules | |||||
from pygraph.c_ext import lsape_binders |
@@ -0,0 +1,43 @@ | |||||
/* | |||||
Python wrapper | |||||
*/ | |||||
#include "hungarian-lsape.hh" | |||||
#include "hungarian-lsap.hh" | |||||
#include <cstdio> | |||||
extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ | |||||
double * u = new double[nm]; | |||||
double * v = new double[nm]; | |||||
int * rho_int = new int[nm]; | |||||
int * varrho_int = new int[nm]; | |||||
hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); | |||||
//Find a better way to do | |||||
for (int i =0;i<nm;i++){ | |||||
rho[i] = (long)(rho_int[i]); | |||||
varrho[i] = (long)(varrho_int[i]); | |||||
} | |||||
return 0; | |||||
} | |||||
extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){ | |||||
double * u = new double[n]; | |||||
double * v = new double[m]; | |||||
int * rho_int = new int[n]; | |||||
int * varrho_int = new int[m]; | |||||
hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v); | |||||
for (int i =0;i<n;i++) | |||||
rho[i] = (long)(rho_int[i]); | |||||
for (int i =0;i<m;i++) | |||||
varrho[i] = (long)(varrho_int[i]); | |||||
return 0; | |||||
} |
@@ -0,0 +1,23 @@ | |||||
import numpy as np | |||||
import ctypes as c | |||||
from ctypes import cdll | |||||
import os.path | |||||
def lsap_solverHG(C): | |||||
''' Binding for lsape hungarian solver ''' | |||||
nm = C.shape[0] | |||||
dll_name = 'liblsap.so' | |||||
lib = cdll.LoadLibrary(os.path.abspath( | |||||
os.path.join(os.path.dirname(__file__), dll_name))) | |||||
lib.lsap.restype = c.c_int | |||||
rho = np.zeros((nm, 1), int) | |||||
varrho = np.zeros((nm, 1), int) | |||||
C[C == np.inf] = 10000 | |||||
lib.lsap(c.c_void_p(C.transpose().ctypes.data), | |||||
c.c_int(nm), | |||||
c.c_void_p(rho.ctypes.data), | |||||
c.c_void_p(varrho.ctypes.data)) | |||||
return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho]) |
@@ -0,0 +1,72 @@ | |||||
from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction | |||||
from pygraph.ged.costfunctions import NeighboorhoodCostFunction | |||||
from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||||
from scipy.optimize import linear_sum_assignment | |||||
def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||||
cf=ConstantCostFunction(1, 3, 1, 3), | |||||
solver=linear_sum_assignment): | |||||
"""Compute Graph Edit Distance between G1 and G2 according to mapping | |||||
encoded within rho and varrho. Graph's node must be indexed by a | |||||
index which is used is rho and varrho | |||||
NB: Utilisation de | |||||
dictionnaire pour etre plus versatile ? | |||||
""" | |||||
if ((rho is None) or (varrho is None)): | |||||
if(method == 'Riesen'): | |||||
cf_bp = RiesenCostFunction(cf,lsap_solver=solver) | |||||
elif(method == 'Neighboorhood'): | |||||
cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver) | |||||
elif(method == 'Basic'): | |||||
cf_bp = cf | |||||
else: | |||||
raise NameError('Non existent method ') | |||||
rho, varrho = getOptimalMapping( | |||||
computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver) | |||||
n = G1.number_of_nodes() | |||||
m = G2.number_of_nodes() | |||||
ged = 0 | |||||
for i in G1.nodes(): | |||||
phi_i = rho[i] | |||||
if(phi_i >= m): | |||||
ged += cf.cnd(i, G1) | |||||
else: | |||||
ged += cf.cns(i, phi_i, G1, G2) | |||||
for j in G2.nodes(): | |||||
phi_j = varrho[j] | |||||
if(phi_j >= n): | |||||
ged += cf.cni(j, G2) | |||||
for e in G1.edges(data=True): | |||||
i = e[0] | |||||
j = e[1] | |||||
phi_i = rho[i] | |||||
phi_j = rho[j] | |||||
if (phi_i < m) and (phi_j < m): | |||||
mappedEdge = len(list(filter(lambda x: True if | |||||
x == phi_j else False, G2[phi_i]))) | |||||
if(mappedEdge): | |||||
e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||||
min_cost = min(cf.ces(e, e2, G1, G2), | |||||
cf.ced(e, G1) + cf.cei(e2, G2)) | |||||
ged += min_cost | |||||
else: | |||||
ged += cf.ced(e, G1) | |||||
else: | |||||
ged += cf.ced(e, G1) | |||||
for e in G2.edges(data=True): | |||||
i = e[0] | |||||
j = e[1] | |||||
phi_i = varrho[i] | |||||
phi_j = varrho[j] | |||||
if (phi_i < n) and (phi_j < n): | |||||
mappedEdge = len(list(filter(lambda x: True if x == phi_j | |||||
else False, G1[phi_i]))) | |||||
if(not mappedEdge): | |||||
ged += cf.cei(e, G2) | |||||
else: | |||||
ged += cf.ced(e, G2) | |||||
return ged, rho, varrho |
@@ -0,0 +1,17 @@ | |||||
# -*-coding:utf-8 -*- | |||||
"""Pygraph - ged module | |||||
Implement some methods to compute ged between graphs | |||||
""" | |||||
# info | |||||
__version__ = "0.1" | |||||
__author__ = "Benoit Gaüzère" | |||||
__date__ = "November 2017" | |||||
from pygraph.ged import costfunctions | |||||
from pygraph.ged import bipartiteGED | |||||
from pygraph.ged import GED | |||||
@@ -0,0 +1,33 @@ | |||||
import numpy as np | |||||
from scipy.optimize import linear_sum_assignment | |||||
from pygraph.ged.costfunctions import ConstantCostFunction | |||||
def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): | |||||
"""Compute a Cost Matrix according to cost function cf""" | |||||
n = G1.number_of_nodes() | |||||
m = G2.number_of_nodes() | |||||
nm = n + m | |||||
C = np.ones([nm, nm])*np.inf | |||||
C[n:, m:] = 0 | |||||
for u in G1.nodes(): | |||||
for v in G2.nodes(): | |||||
cost = cf.cns(u, v, G1, G2) | |||||
C[u, v] = cost | |||||
for v in G1.nodes(): | |||||
C[v, m + v] = cf.cnd(v, G1) | |||||
for v in G2.nodes(): | |||||
C[n + v, v] = cf.cni(v, G2) | |||||
return C | |||||
def getOptimalMapping(C, lsap_solver=linear_sum_assignment): | |||||
"""Compute an optimal linear mapping according to cost Matrix C | |||||
inclure les progs C de Seb | |||||
""" | |||||
row_ind, col_ind = lsap_solver(C) | |||||
return col_ind, row_ind[np.argsort(col_ind)] |
@@ -0,0 +1,138 @@ | |||||
import numpy as np | |||||
from scipy.optimize import linear_sum_assignment | |||||
class ConstantCostFunction: | |||||
""" Define a symmetric constant cost fonction for edit operations """ | |||||
def __init__(self, cns, cni, ces, cei): | |||||
self.cns_ = cns | |||||
self.cni_ = self.cnd_ = cni | |||||
self.ces_ = ces | |||||
self.cei_ = self.ced_ = cei | |||||
def cns(self, node_u, node_v, g1, g2): | |||||
""" return substitution edit operation cost between node_u of G1 and node_v of G2""" | |||||
return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ | |||||
def cnd(self, u, G1): | |||||
return self.cnd_ | |||||
def cni(self, v, G2): | |||||
return self.cni_ | |||||
def ces(self, e1, e2, G1, G2): | |||||
"""tester avec des attributs autres que symboliques en testant | |||||
l'operateur __eq__""" | |||||
return (e1[2]['label'] != e2[2]['label'])*self.ces_ | |||||
def ced(self, e1, G1): | |||||
return self.ced_ | |||||
def cei(self, e2, G2): | |||||
return self.cei_ | |||||
class RiesenCostFunction(): | |||||
""" Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||||
def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||||
self.cf_ = cf | |||||
self.lsap_solver_ = lsap_solver | |||||
def cns(self, u, v, G1, G2): | |||||
""" u et v sont des id de noeuds """ | |||||
n = len(G1[u]) | |||||
m = len(G2[v]) | |||||
sub_C = np.ones([n+m, n+m]) * np.inf | |||||
sub_C[n:, m:] = 0 | |||||
i = 0 | |||||
l_nbr_u = G1[u] | |||||
l_nbr_v = G2[v] | |||||
for nbr_u in l_nbr_u: | |||||
j = 0 | |||||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||||
for nbr_v in G2[v]: | |||||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||||
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||||
j += 1 | |||||
i += 1 | |||||
i = 0 | |||||
for nbr_u in l_nbr_u: | |||||
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||||
i += 1 | |||||
j = 0 | |||||
for nbr_v in l_nbr_v: | |||||
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||||
j += 1 | |||||
row_ind, col_ind = self.lsap_solver_(sub_C) | |||||
cost = np.sum(sub_C[row_ind, col_ind]) | |||||
return self.cf_.cns(u, v, G1, G2) + cost | |||||
def cnd(self, u, G1): | |||||
cost = 0 | |||||
for nbr in G1[u]: | |||||
cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) | |||||
return self.cf_.cnd(u,G1) + cost | |||||
def cni(self, v, G2): | |||||
cost = 0 | |||||
for nbr in G2[v]: | |||||
cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) | |||||
return self.cf_.cni(v, G2) + cost | |||||
class NeighboorhoodCostFunction(): | |||||
""" Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||||
def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||||
self.cf_ = cf | |||||
self.lsap_solver_ = lsap_solver | |||||
def cns(self, u, v, G1, G2): | |||||
""" u et v sont des id de noeuds """ | |||||
n = len(G1[u]) | |||||
m = len(G2[v]) | |||||
sub_C = np.ones([n+m, n+m]) * np.inf | |||||
sub_C[n:, m:] = 0 | |||||
i = 0 | |||||
l_nbr_u = G1[u] | |||||
l_nbr_v = G2[v] | |||||
for nbr_u in l_nbr_u: | |||||
j = 0 | |||||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||||
for nbr_v in G2[v]: | |||||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||||
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||||
sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) | |||||
j += 1 | |||||
i += 1 | |||||
i = 0 | |||||
for nbr_u in l_nbr_u: | |||||
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||||
sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) | |||||
i += 1 | |||||
j = 0 | |||||
for nbr_v in l_nbr_v: | |||||
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||||
sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) | |||||
j += 1 | |||||
row_ind, col_ind = self.lsap_solver_(sub_C) | |||||
cost = np.sum(sub_C[row_ind, col_ind]) | |||||
return self.cf_.cns(u, v, G1, G2) + cost | |||||
def cnd(self, u, G1): | |||||
cost = 0 | |||||
for nbr in G1[u]: | |||||
cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) | |||||
return self.cf_.cnd(u, G1) + cost | |||||
def cni(self, v, G2): | |||||
cost = 0 | |||||
for nbr in G2[v]: | |||||
cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) | |||||
return self.cf_.cni(v, G2) + cost |
@@ -0,0 +1,68 @@ | |||||
import sys | |||||
import pathlib | |||||
sys.path.insert(0, "../") | |||||
import networkx as nx | |||||
import numpy as np | |||||
import time | |||||
from utils.utils import getSPGraph | |||||
def spkernel(*args): | |||||
"""Calculate shortest-path kernels between graphs. | |||||
Parameters | |||||
---------- | |||||
Gn : List of NetworkX graph | |||||
List of graphs between which the kernels are calculated. | |||||
/ | |||||
G1, G2 : NetworkX graphs | |||||
2 graphs between which the kernel is calculated. | |||||
Return | |||||
------ | |||||
Kmatrix/Kernel : Numpy matrix/int | |||||
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. | |||||
References | |||||
---------- | |||||
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||||
""" | |||||
if len(args) == 1: # for a list of graphs | |||||
Gn = args[0] | |||||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
Sn = [] # get shortest path graphs of Gn | |||||
for i in range(0, len(Gn)): | |||||
Sn.append(getSPGraph(Gn[i])) | |||||
start_time = time.time() | |||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Sn[i].edges(data = True): | |||||
for e2 in Sn[j].edges(data = True): | |||||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||||
Kmatrix[i][j] += 1 | |||||
Kmatrix[j][i] += (0 if i == j else 1) | |||||
print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||||
return Kmatrix | |||||
else: # for only 2 graphs | |||||
G1 = args[0] | |||||
G2 = args[1] | |||||
kernel = 0 | |||||
for e1 in G1.edges(data = True): | |||||
for e2 in G2.edges(data = True): | |||||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||||
kernel += 1 | |||||
print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) | |||||
return kernel |
@@ -0,0 +1,17 @@ | |||||
# -*-coding:utf-8 -*- | |||||
"""Pygraph - utils module | |||||
Implement some methods to manage graphs | |||||
graphfiles.py : load .gxl and .ct files | |||||
utils.py : compute some properties on networkX graphs | |||||
""" | |||||
# info | |||||
__version__ = "0.1" | |||||
__author__ = "Benoit Gaüzère" | |||||
__date__ = "November 2017" | |||||
from pygraph.utils import graphfiles | |||||
from pygraph.utils import utils |
@@ -0,0 +1,87 @@ | |||||
import networkx as nx | |||||
def loadCT(filename): | |||||
"""load data from .ct file. | |||||
Notes | |||||
------ | |||||
a typical example of data in .ct is like this: | |||||
3 2 <- number of nodes and edges | |||||
0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? | |||||
0.0000 0.0000 0.0000 C | |||||
0.0000 0.0000 0.0000 O | |||||
1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? | |||||
2 3 1 1 | |||||
""" | |||||
content = open(filename).read().splitlines() | |||||
G = nx.Graph(name=str(content[0])) # set name of the graph | |||||
tmp = content[1].split(" ") | |||||
if tmp[0] == '': | |||||
nb_nodes = int(tmp[1]) # number of the nodes | |||||
nb_edges = int(tmp[2]) # number of the edges | |||||
else: | |||||
nb_nodes = int(tmp[0]) | |||||
nb_edges = int(tmp[1]) | |||||
for i in range(0, nb_nodes): | |||||
tmp = content[i + 2].split(" ") | |||||
tmp = [x for x in tmp if x != ''] | |||||
G.add_node(i, label=tmp[3]) | |||||
for i in range(0, nb_edges): | |||||
tmp = content[i + G.number_of_nodes() + 2].split(" ") | |||||
tmp = [x for x in tmp if x != ''] | |||||
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||||
return G | |||||
def loadGXL(filename): | |||||
import networkx as nx | |||||
import xml.etree.ElementTree as ET | |||||
tree = ET.parse(filename) | |||||
root = tree.getroot() | |||||
index = 0 | |||||
G = nx.Graph() | |||||
dic={} | |||||
for node in root.iter('node'): | |||||
label = node.find('attr')[0].text | |||||
dic[node.attrib['id']] = index | |||||
G.add_node(index, id=node.attrib['id'], label=label) | |||||
index += 1 | |||||
for edge in root.iter('edge'): | |||||
label = edge.find('attr')[0].text | |||||
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||||
return G | |||||
def loadDataset(filename): | |||||
"""load file list of the dataset. | |||||
""" | |||||
from os.path import dirname, splitext | |||||
dirname_dataset = dirname(filename) | |||||
extension = splitext(filename)[1][1:] | |||||
data = [] | |||||
y = [] | |||||
if(extension == "ds"): | |||||
content = open(filename).read().splitlines() | |||||
for i in range(0, len(content)): | |||||
tmp = content[i].split(' ') | |||||
data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names | |||||
y.append(float(tmp[1])) | |||||
elif(extension == "cxl"): | |||||
import xml.etree.ElementTree as ET | |||||
tree = ET.parse(filename) | |||||
root = tree.getroot() | |||||
data = [] | |||||
y = [] | |||||
for graph in root.iter('print'): | |||||
mol_filename = graph.attrib['file'] | |||||
mol_class = graph.attrib['class'] | |||||
data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||||
y.append(mol_class) | |||||
return data, y |
@@ -0,0 +1,59 @@ | |||||
import networkx as nx | |||||
import numpy as np | |||||
def getSPLengths(G1): | |||||
sp = nx.shortest_path(G1) | |||||
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||||
for i in sp.keys(): | |||||
for j in sp[i].keys(): | |||||
distances[i, j] = len(sp[i][j])-1 | |||||
return distances | |||||
def getSPGraph(G): | |||||
"""Transform graph G to its corresponding shortest-paths graph. | |||||
Parameters | |||||
---------- | |||||
G : NetworkX graph | |||||
The graph to be tramsformed. | |||||
Return | |||||
------ | |||||
S : NetworkX graph | |||||
The shortest-paths graph corresponding to G. | |||||
Notes | |||||
------ | |||||
For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. | |||||
References | |||||
---------- | |||||
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||||
""" | |||||
return floydTransformation(G) | |||||
def floydTransformation(G): | |||||
"""Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. | |||||
Parameters | |||||
---------- | |||||
G : NetworkX graph | |||||
The graph to be tramsformed. | |||||
Return | |||||
------ | |||||
S : NetworkX graph | |||||
The shortest-paths graph corresponding to G. | |||||
References | |||||
---------- | |||||
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||||
""" | |||||
spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered | |||||
S = nx.Graph() | |||||
S.add_nodes_from(G.nodes(data=True)) | |||||
for i in range(0, G.number_of_nodes()): | |||||
for j in range(0, G.number_of_nodes()): | |||||
S.add_edge(i, j, cost = spMatrix[i, j]) | |||||
return S |
@@ -0,0 +1,5 @@ | |||||
To use the library : | |||||
$> virtualenv --python=/usr/bin/python3.5 venv | |||||
$> pip install -r requirements.txt | |||||
$> source venv/bin/activate | |||||
... Go use pygraph |
@@ -0,0 +1,66 @@ | |||||
import ot | |||||
import sys | |||||
import pathlib | |||||
sys.path.insert(0, "../") | |||||
from pygraph.utils.graphfiles import loadDataset | |||||
from pygraph.ged.costfunctions import ConstantCostFunction | |||||
from pygraph.utils.utils import getSPLengths | |||||
from tqdm import tqdm | |||||
import numpy as np | |||||
from scipy.optimize import linear_sum_assignment | |||||
from pygraph.ged.GED import ged | |||||
import scipy | |||||
def pad(C, n): | |||||
C_pad = np.zeros((n, n)) | |||||
C_pad[:C.shape[0], :C.shape[1]] = C | |||||
return C_pad | |||||
if (__name__ == "__main__"): | |||||
ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" | |||||
dataset, y = loadDataset(ds_filename) | |||||
cf = ConstantCostFunction(1, 3, 1, 3) | |||||
N = len(dataset) | |||||
pairs = list() | |||||
ged_distances = list() #np.zeros((N, N)) | |||||
gw_distances = list() #np.zeros((N, N)) | |||||
for i in tqdm(range(0, N)): | |||||
for j in tqdm(range(i, N)): | |||||
G1 = dataset[i] | |||||
G2 = dataset[j] | |||||
n = G1.number_of_nodes() | |||||
m = G2.number_of_nodes() | |||||
if(n == m): | |||||
C1 = getSPLengths(G1) | |||||
C2 = getSPLengths(G2) | |||||
C1 /= C1.max() | |||||
C2 /= C2.max() | |||||
dim = max(n, m) | |||||
if(n < m): | |||||
C1 = pad(C1, dim) | |||||
elif (m < n): | |||||
C2 = pad(C2, dim) | |||||
p = ot.unif(dim) | |||||
q = ot.unif(dim) | |||||
gw = ot.gromov_wasserstein(C1, C2, p, q, | |||||
'square_loss', epsilon=5e-3) | |||||
row_ind, col_ind = linear_sum_assignment(-gw) | |||||
rho = col_ind | |||||
varrho = row_ind[np.argsort(col_ind)] | |||||
pairs.append((i,j)) | |||||
gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) | |||||
ged_distances.append(ged(G1, G2, cf=cf)[0]) | |||||
print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) | |||||
print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) | |||||
np.save("distances_riesen", ged_distances) | |||||
np.save("distances_gw", gw_distances) |
@@ -0,0 +1,16 @@ | |||||
cycler==0.10.0 | |||||
Cython==0.27.3 | |||||
decorator==4.1.2 | |||||
matplotlib==2.1.0 | |||||
networkx==2.0 | |||||
numpy==1.13.3 | |||||
pkg-resources==0.0.0 | |||||
POT==0.4.0 | |||||
pyparsing==2.2.0 | |||||
python-dateutil==2.6.1 | |||||
pytz==2017.3 | |||||
scikit-learn==0.19.1 | |||||
scipy==1.0.0 | |||||
six==1.11.0 | |||||
sklearn==0.0 | |||||
tqdm==4.19.4 |