@@ -1,21 +0,0 @@ | |||
# -*-coding:utf-8 -*- | |||
""" | |||
Pygraph | |||
This package contains 4 sub packages : | |||
* c_ext : binders to C++ code | |||
* ged : allows to compute graph edit distance between networkX graphs | |||
* kernels : computation of graph kernels, ie graph similarity measure compatible with SVM | |||
* notebooks : examples of code using this library | |||
* utils : Diverse computation on graphs | |||
""" | |||
# info | |||
__version__ = "0.1" | |||
__author__ = "Benoit Gaüzère" | |||
__date__ = "November 2017" | |||
# import sub modules | |||
from pygraph import c_ext | |||
from pygraph import ged | |||
from pygraph import utils |
@@ -1,116 +0,0 @@ | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") | |||
import networkx as nx | |||
import numpy as np | |||
import time | |||
def marginalizedkernel(*args): | |||
"""Calculate marginalized graph kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
p_quit : integer | |||
the termination probability in the random walks generating step | |||
itr : integer | |||
time of iterations to calculate R_inf | |||
Return | |||
------ | |||
Kmatrix/Kernel : Numpy matrix/int | |||
Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs. | |||
References | |||
---------- | |||
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. | |||
""" | |||
if len(args) == 3: # for a list of graphs | |||
Gn = args[0] | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
start_time = time.time() | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||
return Kmatrix | |||
else: # for only 2 graphs | |||
# init parameters | |||
G1 = args[0] | |||
G2 = args[1] | |||
p_quit = args[2] # the termination probability in the random walks generating step | |||
itr = args[3] # time of iterations to calculate R_inf | |||
kernel = 0 | |||
num_nodes_G1 = nx.number_of_nodes(G1) | |||
num_nodes_G2 = nx.number_of_nodes(G2) | |||
p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|) | |||
p_init_G2 = 1 / num_nodes_G2 | |||
q = p_quit * p_quit | |||
r1 = q | |||
# initial R_inf | |||
R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes | |||
# calculate R_inf with a simple interative method | |||
for i in range(1, itr): | |||
R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | |||
R_inf_new.fill(r1) | |||
# calculate R_inf for each pair of nodes | |||
for node1 in G1.nodes(data = True): | |||
neighbor_n1 = G1[node1[0]] | |||
p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex) | |||
for node2 in G2.nodes(data = True): | |||
neighbor_n2 = G2[node2[0]] | |||
p_trans_n2 = (1 - p_quit) / len(neighbor_n2) | |||
for neighbor1 in neighbor_n1: | |||
for neighbor2 in neighbor_n2: | |||
t = p_trans_n1 * p_trans_n2 * \ | |||
deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \ | |||
deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label']) | |||
R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8) | |||
R_inf[:] = R_inf_new | |||
# add elements of R_inf up and calculate kernel | |||
for node1 in G1.nodes(data = True): | |||
for node2 in G2.nodes(data = True): | |||
s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label']) | |||
kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) | |||
return kernel | |||
def deltaKernel(condition): | |||
"""Return 1 if condition holds, 0 otherwise. | |||
Parameters | |||
---------- | |||
condition : Boolean | |||
A condition, according to which the kernel is set to 1 or 0. | |||
Return | |||
------ | |||
Kernel : integer | |||
Delta Kernel. | |||
References | |||
---------- | |||
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. | |||
""" | |||
return (1 if condition else 0) |
@@ -1,68 +0,0 @@ | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") | |||
import networkx as nx | |||
import numpy as np | |||
import time | |||
from utils.utils import getSPGraph | |||
def spkernel(*args): | |||
"""Calculate shortest-path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
Return | |||
------ | |||
Kmatrix/Kernel : Numpy matrix/int | |||
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. | |||
References | |||
---------- | |||
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
""" | |||
if len(args) == 1: # for a list of graphs | |||
Gn = args[0] | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
Sn = [] # get shortest path graphs of Gn | |||
for i in range(0, len(Gn)): | |||
Sn.append(getSPGraph(Gn[i])) | |||
start_time = time.time() | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Sn[i].edges(data = True): | |||
for e2 in Sn[j].edges(data = True): | |||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
Kmatrix[i][j] += 1 | |||
Kmatrix[j][i] += (0 if i == j else 1) | |||
print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||
return Kmatrix | |||
else: # for only 2 graphs | |||
G1 = args[0] | |||
G2 = args[1] | |||
kernel = 0 | |||
for e1 in G1.edges(data = True): | |||
for e2 in G2.edges(data = True): | |||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
kernel += 1 | |||
print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) | |||
return kernel |
@@ -1,68 +0,0 @@ | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") | |||
import networkx as nx | |||
import numpy as np | |||
import time | |||
from utils.utils import getSPGraph | |||
def spkernel(*args): | |||
"""Calculate shortest-path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
Return | |||
------ | |||
Kmatrix/Kernel : Numpy matrix/int | |||
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. | |||
References | |||
---------- | |||
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
""" | |||
if len(args) == 1: # for a list of graphs | |||
Gn = args[0] | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
Sn = [] # get shortest path graphs of Gn | |||
for i in range(0, len(Gn)): | |||
Sn.append(getSPGraph(Gn[i])) | |||
start_time = time.time() | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Sn[i].edges(data = True): | |||
for e2 in Sn[j].edges(data = True): | |||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
Kmatrix[i][j] += 1 | |||
Kmatrix[j][i] += (0 if i == j else 1) | |||
print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) | |||
return Kmatrix | |||
else: # for only 2 graphs | |||
G1 = args[0] | |||
G2 = args[1] | |||
kernel = 0 | |||
for e1 in G1.edges(data = True): | |||
for e2 in G2.edges(data = True): | |||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
kernel += 1 | |||
print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) | |||
return kernel |
@@ -1,17 +0,0 @@ | |||
# -*-coding:utf-8 -*- | |||
"""Pygraph - utils module | |||
Implement some methods to manage graphs | |||
graphfiles.py : load .gxl and .ct files | |||
utils.py : compute some properties on networkX graphs | |||
""" | |||
# info | |||
__version__ = "0.1" | |||
__author__ = "Benoit Gaüzère" | |||
__date__ = "November 2017" | |||
from utils import graphfiles | |||
from utils import utils |
@@ -1,87 +0,0 @@ | |||
import networkx as nx | |||
def loadCT(filename): | |||
"""load data from .ct file. | |||
Notes | |||
------ | |||
a typical example of data in .ct is like this: | |||
3 2 <- number of nodes and edges | |||
0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? | |||
0.0000 0.0000 0.0000 C | |||
0.0000 0.0000 0.0000 O | |||
1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? | |||
2 3 1 1 | |||
""" | |||
content = open(filename).read().splitlines() | |||
G = nx.Graph(name=str(content[0])) # set name of the graph | |||
tmp = content[1].split(" ") | |||
if tmp[0] == '': | |||
nb_nodes = int(tmp[1]) # number of the nodes | |||
nb_edges = int(tmp[2]) # number of the edges | |||
else: | |||
nb_nodes = int(tmp[0]) | |||
nb_edges = int(tmp[1]) | |||
for i in range(0, nb_nodes): | |||
tmp = content[i + 2].split(" ") | |||
tmp = [x for x in tmp if x != ''] | |||
G.add_node(i, label=tmp[3]) | |||
for i in range(0, nb_edges): | |||
tmp = content[i + G.number_of_nodes() + 2].split(" ") | |||
tmp = [x for x in tmp if x != ''] | |||
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||
return G | |||
def loadGXL(filename): | |||
import networkx as nx | |||
import xml.etree.ElementTree as ET | |||
tree = ET.parse(filename) | |||
root = tree.getroot() | |||
index = 0 | |||
G = nx.Graph() | |||
dic={} | |||
for node in root.iter('node'): | |||
label = node.find('attr')[0].text | |||
dic[node.attrib['id']] = index | |||
G.add_node(index, id=node.attrib['id'], label=label) | |||
index += 1 | |||
for edge in root.iter('edge'): | |||
label = edge.find('attr')[0].text | |||
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||
return G | |||
def loadDataset(filename): | |||
"""load file list of the dataset. | |||
""" | |||
from os.path import dirname, splitext | |||
dirname_dataset = dirname(filename) | |||
extension = splitext(filename)[1][1:] | |||
data = [] | |||
y = [] | |||
if(extension == "ds"): | |||
content = open(filename).read().splitlines() | |||
for i in range(0, len(content)): | |||
tmp = content[i].split(' ') | |||
data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names | |||
y.append(float(tmp[1])) | |||
elif(extension == "cxl"): | |||
import xml.etree.ElementTree as ET | |||
tree = ET.parse(filename) | |||
root = tree.getroot() | |||
data = [] | |||
y = [] | |||
for graph in root.iter('print'): | |||
mol_filename = graph.attrib['file'] | |||
mol_class = graph.attrib['class'] | |||
data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||
y.append(mol_class) | |||
return data, y |
@@ -1,59 +0,0 @@ | |||
import networkx as nx | |||
import numpy as np | |||
def getSPLengths(G1): | |||
sp = nx.shortest_path(G1) | |||
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
for i in np.keys(): | |||
for j in np[i].keys(): | |||
distances[i, j] = len(sp[i][j])-1 | |||
return distances | |||
def getSPGraph(G): | |||
"""Transform graph G to its corresponding shortest-paths graph. | |||
Parameters | |||
---------- | |||
G : NetworkX graph | |||
The graph to be tramsformed. | |||
Return | |||
------ | |||
S : NetworkX graph | |||
The shortest-paths graph corresponding to G. | |||
Notes | |||
------ | |||
For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. | |||
References | |||
---------- | |||
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
""" | |||
return floydTransformation(G) | |||
def floydTransformation(G): | |||
"""Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. | |||
Parameters | |||
---------- | |||
G : NetworkX graph | |||
The graph to be tramsformed. | |||
Return | |||
------ | |||
S : NetworkX graph | |||
The shortest-paths graph corresponding to G. | |||
References | |||
---------- | |||
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. | |||
""" | |||
spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered | |||
S = nx.Graph() | |||
S.add_nodes_from(G.nodes(data=True)) | |||
for i in range(0, G.number_of_nodes()): | |||
for j in range(0, G.number_of_nodes()): | |||
S.add_edge(i, j, cost = spMatrix[i, j]) | |||
return S |
@@ -1,5 +0,0 @@ | |||
# You must specify your env variable LSAPE_DIR | |||
#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ | |||
liblsap.so:lsap.cpp | |||
g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) |
@@ -1,6 +0,0 @@ | |||
Python wrapper for lsape method | |||
Specify your LSAPE_DIR env variable with the location of the source | |||
code to compile | |||
source code : https://bougleux.users.greyc.fr/lsape/ |
@@ -1,17 +0,0 @@ | |||
# -*-coding:utf-8 -*- | |||
"""Pygraph - c_ext module | |||
This package binds some C++ code to python | |||
lsape_binders.py : binders to C++ code of LSAPE methods implemented in | |||
https://bougleux.users.greyc.fr/lsape/ | |||
""" | |||
# info | |||
__version__ = "0.1" | |||
__author__ = "Benoit Gaüzère" | |||
__date__ = "November 2017" | |||
# import sub modules | |||
from pygraph.c_ext import lsape_binders |
@@ -1,43 +0,0 @@ | |||
/* | |||
Python wrapper | |||
*/ | |||
#include "hungarian-lsape.hh" | |||
#include "hungarian-lsap.hh" | |||
#include <cstdio> | |||
extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ | |||
double * u = new double[nm]; | |||
double * v = new double[nm]; | |||
int * rho_int = new int[nm]; | |||
int * varrho_int = new int[nm]; | |||
hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); | |||
//Find a better way to do | |||
for (int i =0;i<nm;i++){ | |||
rho[i] = (long)(rho_int[i]); | |||
varrho[i] = (long)(varrho_int[i]); | |||
} | |||
return 0; | |||
} | |||
extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){ | |||
double * u = new double[n]; | |||
double * v = new double[m]; | |||
int * rho_int = new int[n]; | |||
int * varrho_int = new int[m]; | |||
hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v); | |||
for (int i =0;i<n;i++) | |||
rho[i] = (long)(rho_int[i]); | |||
for (int i =0;i<m;i++) | |||
varrho[i] = (long)(varrho_int[i]); | |||
return 0; | |||
} |
@@ -1,23 +0,0 @@ | |||
import numpy as np | |||
import ctypes as c | |||
from ctypes import cdll | |||
import os.path | |||
def lsap_solverHG(C): | |||
''' Binding for lsape hungarian solver ''' | |||
nm = C.shape[0] | |||
dll_name = 'liblsap.so' | |||
lib = cdll.LoadLibrary(os.path.abspath( | |||
os.path.join(os.path.dirname(__file__), dll_name))) | |||
lib.lsap.restype = c.c_int | |||
rho = np.zeros((nm, 1), int) | |||
varrho = np.zeros((nm, 1), int) | |||
C[C == np.inf] = 10000 | |||
lib.lsap(c.c_void_p(C.transpose().ctypes.data), | |||
c.c_int(nm), | |||
c.c_void_p(rho.ctypes.data), | |||
c.c_void_p(varrho.ctypes.data)) | |||
return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho]) |
@@ -1,72 +0,0 @@ | |||
from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction | |||
from pygraph.ged.costfunctions import NeighboorhoodCostFunction | |||
from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||
from scipy.optimize import linear_sum_assignment | |||
def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
cf=ConstantCostFunction(1, 3, 1, 3), | |||
solver=linear_sum_assignment): | |||
"""Compute Graph Edit Distance between G1 and G2 according to mapping | |||
encoded within rho and varrho. Graph's node must be indexed by a | |||
index which is used is rho and varrho | |||
NB: Utilisation de | |||
dictionnaire pour etre plus versatile ? | |||
""" | |||
if ((rho is None) or (varrho is None)): | |||
if(method == 'Riesen'): | |||
cf_bp = RiesenCostFunction(cf,lsap_solver=solver) | |||
elif(method == 'Neighboorhood'): | |||
cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver) | |||
elif(method == 'Basic'): | |||
cf_bp = cf | |||
else: | |||
raise NameError('Non existent method ') | |||
rho, varrho = getOptimalMapping( | |||
computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver) | |||
n = G1.number_of_nodes() | |||
m = G2.number_of_nodes() | |||
ged = 0 | |||
for i in G1.nodes(): | |||
phi_i = rho[i] | |||
if(phi_i >= m): | |||
ged += cf.cnd(i, G1) | |||
else: | |||
ged += cf.cns(i, phi_i, G1, G2) | |||
for j in G2.nodes(): | |||
phi_j = varrho[j] | |||
if(phi_j >= n): | |||
ged += cf.cni(j, G2) | |||
for e in G1.edges(data=True): | |||
i = e[0] | |||
j = e[1] | |||
phi_i = rho[i] | |||
phi_j = rho[j] | |||
if (phi_i < m) and (phi_j < m): | |||
mappedEdge = len(list(filter(lambda x: True if | |||
x == phi_j else False, G2[phi_i]))) | |||
if(mappedEdge): | |||
e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||
min_cost = min(cf.ces(e, e2, G1, G2), | |||
cf.ced(e, G1) + cf.cei(e2, G2)) | |||
ged += min_cost | |||
else: | |||
ged += cf.ced(e, G1) | |||
else: | |||
ged += cf.ced(e, G1) | |||
for e in G2.edges(data=True): | |||
i = e[0] | |||
j = e[1] | |||
phi_i = varrho[i] | |||
phi_j = varrho[j] | |||
if (phi_i < n) and (phi_j < n): | |||
mappedEdge = len(list(filter(lambda x: True if x == phi_j | |||
else False, G1[phi_i]))) | |||
if(not mappedEdge): | |||
ged += cf.cei(e, G2) | |||
else: | |||
ged += cf.ced(e, G2) | |||
return ged, rho, varrho |
@@ -1,17 +0,0 @@ | |||
# -*-coding:utf-8 -*- | |||
"""Pygraph - ged module | |||
Implement some methods to compute ged between graphs | |||
""" | |||
# info | |||
__version__ = "0.1" | |||
__author__ = "Benoit Gaüzère" | |||
__date__ = "November 2017" | |||
from pygraph.ged import costfunctions | |||
from pygraph.ged import bipartiteGED | |||
from pygraph.ged import GED | |||
@@ -1,33 +0,0 @@ | |||
import numpy as np | |||
from scipy.optimize import linear_sum_assignment | |||
from pygraph.ged.costfunctions import ConstantCostFunction | |||
def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): | |||
"""Compute a Cost Matrix according to cost function cf""" | |||
n = G1.number_of_nodes() | |||
m = G2.number_of_nodes() | |||
nm = n + m | |||
C = np.ones([nm, nm])*np.inf | |||
C[n:, m:] = 0 | |||
for u in G1.nodes(): | |||
for v in G2.nodes(): | |||
cost = cf.cns(u, v, G1, G2) | |||
C[u, v] = cost | |||
for v in G1.nodes(): | |||
C[v, m + v] = cf.cnd(v, G1) | |||
for v in G2.nodes(): | |||
C[n + v, v] = cf.cni(v, G2) | |||
return C | |||
def getOptimalMapping(C, lsap_solver=linear_sum_assignment): | |||
"""Compute an optimal linear mapping according to cost Matrix C | |||
inclure les progs C de Seb | |||
""" | |||
row_ind, col_ind = lsap_solver(C) | |||
return col_ind, row_ind[np.argsort(col_ind)] |
@@ -1,138 +0,0 @@ | |||
import numpy as np | |||
from scipy.optimize import linear_sum_assignment | |||
class ConstantCostFunction: | |||
""" Define a symmetric constant cost fonction for edit operations """ | |||
def __init__(self, cns, cni, ces, cei): | |||
self.cns_ = cns | |||
self.cni_ = self.cnd_ = cni | |||
self.ces_ = ces | |||
self.cei_ = self.ced_ = cei | |||
def cns(self, node_u, node_v, g1, g2): | |||
""" return substitution edit operation cost between node_u of G1 and node_v of G2""" | |||
return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ | |||
def cnd(self, u, G1): | |||
return self.cnd_ | |||
def cni(self, v, G2): | |||
return self.cni_ | |||
def ces(self, e1, e2, G1, G2): | |||
"""tester avec des attributs autres que symboliques en testant | |||
l'operateur __eq__""" | |||
return (e1[2]['label'] != e2[2]['label'])*self.ces_ | |||
def ced(self, e1, G1): | |||
return self.ced_ | |||
def cei(self, e2, G2): | |||
return self.cei_ | |||
class RiesenCostFunction(): | |||
""" Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||
def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||
self.cf_ = cf | |||
self.lsap_solver_ = lsap_solver | |||
def cns(self, u, v, G1, G2): | |||
""" u et v sont des id de noeuds """ | |||
n = len(G1[u]) | |||
m = len(G2[v]) | |||
sub_C = np.ones([n+m, n+m]) * np.inf | |||
sub_C[n:, m:] = 0 | |||
i = 0 | |||
l_nbr_u = G1[u] | |||
l_nbr_v = G2[v] | |||
for nbr_u in l_nbr_u: | |||
j = 0 | |||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||
for nbr_v in G2[v]: | |||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||
j += 1 | |||
i += 1 | |||
i = 0 | |||
for nbr_u in l_nbr_u: | |||
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
i += 1 | |||
j = 0 | |||
for nbr_v in l_nbr_v: | |||
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
j += 1 | |||
row_ind, col_ind = self.lsap_solver_(sub_C) | |||
cost = np.sum(sub_C[row_ind, col_ind]) | |||
return self.cf_.cns(u, v, G1, G2) + cost | |||
def cnd(self, u, G1): | |||
cost = 0 | |||
for nbr in G1[u]: | |||
cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) | |||
return self.cf_.cnd(u,G1) + cost | |||
def cni(self, v, G2): | |||
cost = 0 | |||
for nbr in G2[v]: | |||
cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) | |||
return self.cf_.cni(v, G2) + cost | |||
class NeighboorhoodCostFunction(): | |||
""" Cost function associated to the computation of a cost matrix between nodes for LSAP""" | |||
def __init__(self, cf, lsap_solver=linear_sum_assignment): | |||
self.cf_ = cf | |||
self.lsap_solver_ = lsap_solver | |||
def cns(self, u, v, G1, G2): | |||
""" u et v sont des id de noeuds """ | |||
n = len(G1[u]) | |||
m = len(G2[v]) | |||
sub_C = np.ones([n+m, n+m]) * np.inf | |||
sub_C[n:, m:] = 0 | |||
i = 0 | |||
l_nbr_u = G1[u] | |||
l_nbr_v = G2[v] | |||
for nbr_u in l_nbr_u: | |||
j = 0 | |||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||
for nbr_v in G2[v]: | |||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) | |||
sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) | |||
j += 1 | |||
i += 1 | |||
i = 0 | |||
for nbr_u in l_nbr_u: | |||
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) | |||
i += 1 | |||
j = 0 | |||
for nbr_v in l_nbr_v: | |||
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) | |||
j += 1 | |||
row_ind, col_ind = self.lsap_solver_(sub_C) | |||
cost = np.sum(sub_C[row_ind, col_ind]) | |||
return self.cf_.cns(u, v, G1, G2) + cost | |||
def cnd(self, u, G1): | |||
cost = 0 | |||
for nbr in G1[u]: | |||
cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) | |||
return self.cf_.cnd(u, G1) + cost | |||
def cni(self, v, G2): | |||
cost = 0 | |||
for nbr in G2[v]: | |||
cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) | |||
return self.cf_.cni(v, G2) + cost |
@@ -13,5 +13,5 @@ __version__ = "0.1" | |||
__author__ = "Benoit Gaüzère" | |||
__date__ = "November 2017" | |||
from pygraph.utils import graphfiles | |||
from pygraph.utils import utils | |||
from utils import graphfiles | |||
from utils import utils |
@@ -5,8 +5,8 @@ import numpy as np | |||
def getSPLengths(G1): | |||
sp = nx.shortest_path(G1) | |||
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
for i in sp.keys(): | |||
for j in sp[i].keys(): | |||
for i in np.keys(): | |||
for j in np[i].keys(): | |||
distances[i, j] = len(sp[i][j])-1 | |||
return distances | |||
@@ -1,5 +0,0 @@ | |||
To use the library : | |||
$> virtualenv --python=/usr/bin/python3.5 venv | |||
$> pip install -r requirements.txt | |||
$> source venv/bin/activate | |||
... Go use pygraph |
@@ -1,66 +0,0 @@ | |||
import ot | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.ged.costfunctions import ConstantCostFunction | |||
from pygraph.utils.utils import getSPLengths | |||
from tqdm import tqdm | |||
import numpy as np | |||
from scipy.optimize import linear_sum_assignment | |||
from pygraph.ged.GED import ged | |||
import scipy | |||
def pad(C, n): | |||
C_pad = np.zeros((n, n)) | |||
C_pad[:C.shape[0], :C.shape[1]] = C | |||
return C_pad | |||
if (__name__ == "__main__"): | |||
ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" | |||
dataset, y = loadDataset(ds_filename) | |||
cf = ConstantCostFunction(1, 3, 1, 3) | |||
N = len(dataset) | |||
pairs = list() | |||
ged_distances = list() #np.zeros((N, N)) | |||
gw_distances = list() #np.zeros((N, N)) | |||
for i in tqdm(range(0, N)): | |||
for j in tqdm(range(i, N)): | |||
G1 = dataset[i] | |||
G2 = dataset[j] | |||
n = G1.number_of_nodes() | |||
m = G2.number_of_nodes() | |||
if(n == m): | |||
C1 = getSPLengths(G1) | |||
C2 = getSPLengths(G2) | |||
C1 /= C1.max() | |||
C2 /= C2.max() | |||
dim = max(n, m) | |||
if(n < m): | |||
C1 = pad(C1, dim) | |||
elif (m < n): | |||
C2 = pad(C2, dim) | |||
p = ot.unif(dim) | |||
q = ot.unif(dim) | |||
gw = ot.gromov_wasserstein(C1, C2, p, q, | |||
'square_loss', epsilon=5e-3) | |||
row_ind, col_ind = linear_sum_assignment(-gw) | |||
rho = col_ind | |||
varrho = row_ind[np.argsort(col_ind)] | |||
pairs.append((i,j)) | |||
gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) | |||
ged_distances.append(ged(G1, G2, cf=cf)[0]) | |||
print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) | |||
print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) | |||
np.save("distances_riesen", ged_distances) | |||
np.save("distances_gw", gw_distances) |
@@ -1,16 +0,0 @@ | |||
cycler==0.10.0 | |||
Cython==0.27.3 | |||
decorator==4.1.2 | |||
matplotlib==2.1.0 | |||
networkx==2.0 | |||
numpy==1.13.3 | |||
pkg-resources==0.0.0 | |||
POT==0.4.0 | |||
pyparsing==2.2.0 | |||
python-dateutil==2.6.1 | |||
pytz==2017.3 | |||
scikit-learn==0.19.1 | |||
scipy==1.0.0 | |||
six==1.11.0 | |||
sklearn==0.0 | |||
tqdm==4.19.4 |