diff --git a/notebooks/py-graph_test.ipynb b/notebooks/py-graph_test.ipynb new file mode 100644 index 0000000..96e0d0d --- /dev/null +++ b/notebooks/py-graph_test.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import paths\n", + "\n", + "import pygraph\n", + "\n", + "from pygraph.utils.graphfiles import loadDataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import networkx as nx\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# We load a ds dataset\n", + "# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n", + "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n", + "100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n", + "100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n", + "100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n" + ] + } + ], + "source": [ + "#Compute graph edit distances\n", + "\n", + "from tqdm import tqdm\n", + "from pygraph.c_ext.lsape_binders import lsap_solverHG\n", + "from pygraph.ged.costfunctions import ConstantCostFunction\n", + "from pygraph.ged.GED import ged\n", + "import time\n", + "\n", + "cf = ConstantCostFunction(1,3,1,3)\n", + "N=len(dataset)\n", + "\n", + "methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n", + "ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n", + "\n", + "times = list()\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", + "times.append(time.clock() - start)\n", + "\n", + "\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n", + "\n", + "times.append(time.clock() - start)\n", + "\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n", + "times.append(time.clock() - start)\n", + "\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n", + "times.append(time.clock() - start)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " method \t mean \t mean \t time\n", + " Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n", + " Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n", + " Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n", + " Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n" + ] + } + ], + "source": [ + "print(\" method \\t mean \\t mean \\t time\")\n", + "data = list()\n", + "for i in range(0,len(ged_distances)):\n", + " ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n", + " print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + }, + "name": "py-graph_test.ipynb" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pygraph/__init__.py b/pygraph/__init__.py new file mode 100644 index 0000000..3eafadb --- /dev/null +++ b/pygraph/__init__.py @@ -0,0 +1,21 @@ +# -*-coding:utf-8 -*- +""" +Pygraph + +This package contains 4 sub packages : + * c_ext : binders to C++ code + * ged : allows to compute graph edit distance between networkX graphs + * kernels : computation of graph kernels, ie graph similarity measure compatible with SVM + * notebooks : examples of code using this library + * utils : Diverse computation on graphs +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +# import sub modules +from pygraph import c_ext +from pygraph import ged +from pygraph import utils diff --git a/pygraph/c_ext/Makefile b/pygraph/c_ext/Makefile new file mode 100644 index 0000000..71c3eb6 --- /dev/null +++ b/pygraph/c_ext/Makefile @@ -0,0 +1,5 @@ +# You must specify your env variable LSAPE_DIR +#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ + +liblsap.so:lsap.cpp + g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) diff --git a/pygraph/c_ext/README.md b/pygraph/c_ext/README.md new file mode 100644 index 0000000..f1aa549 --- /dev/null +++ b/pygraph/c_ext/README.md @@ -0,0 +1,6 @@ +Python wrapper for lsape method + +Specify your LSAPE_DIR env variable with the location of the source +code to compile + +source code : https://bougleux.users.greyc.fr/lsape/ diff --git a/pygraph/c_ext/__init__.py b/pygraph/c_ext/__init__.py new file mode 100644 index 0000000..7101e7e --- /dev/null +++ b/pygraph/c_ext/__init__.py @@ -0,0 +1,17 @@ +# -*-coding:utf-8 -*- +"""Pygraph - c_ext module + +This package binds some C++ code to python + +lsape_binders.py : binders to C++ code of LSAPE methods implemented in +https://bougleux.users.greyc.fr/lsape/ + +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +# import sub modules +from pygraph.c_ext import lsape_binders diff --git a/pygraph/c_ext/lsap.cpp b/pygraph/c_ext/lsap.cpp new file mode 100644 index 0000000..712a092 --- /dev/null +++ b/pygraph/c_ext/lsap.cpp @@ -0,0 +1,43 @@ +/* +Python wrapper +*/ + +#include "hungarian-lsape.hh" +#include "hungarian-lsap.hh" + +#include + +extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ + double * u = new double[nm]; + double * v = new double[nm]; + + int * rho_int = new int[nm]; + int * varrho_int = new int[nm]; + + hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); + //Find a better way to do + for (int i =0;i= m): + ged += cf.cnd(i, G1) + else: + ged += cf.cns(i, phi_i, G1, G2) + for j in G2.nodes(): + phi_j = varrho[j] + if(phi_j >= n): + ged += cf.cni(j, G2) + + for e in G1.edges(data=True): + i = e[0] + j = e[1] + phi_i = rho[i] + phi_j = rho[j] + if (phi_i < m) and (phi_j < m): + mappedEdge = len(list(filter(lambda x: True if + x == phi_j else False, G2[phi_i]))) + if(mappedEdge): + e2 = [phi_i, phi_j, G2[phi_i][phi_j]] + min_cost = min(cf.ces(e, e2, G1, G2), + cf.ced(e, G1) + cf.cei(e2, G2)) + ged += min_cost + else: + ged += cf.ced(e, G1) + else: + ged += cf.ced(e, G1) + for e in G2.edges(data=True): + i = e[0] + j = e[1] + phi_i = varrho[i] + phi_j = varrho[j] + if (phi_i < n) and (phi_j < n): + mappedEdge = len(list(filter(lambda x: True if x == phi_j + else False, G1[phi_i]))) + if(not mappedEdge): + ged += cf.cei(e, G2) + else: + ged += cf.ced(e, G2) + return ged, rho, varrho diff --git a/pygraph/ged/__init__.py b/pygraph/ged/__init__.py new file mode 100644 index 0000000..7f5b1bc --- /dev/null +++ b/pygraph/ged/__init__.py @@ -0,0 +1,17 @@ +# -*-coding:utf-8 -*- +"""Pygraph - ged module + +Implement some methods to compute ged between graphs + + +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +from pygraph.ged import costfunctions +from pygraph.ged import bipartiteGED +from pygraph.ged import GED + diff --git a/pygraph/ged/bipartiteGED.py b/pygraph/ged/bipartiteGED.py new file mode 100644 index 0000000..b997f9e --- /dev/null +++ b/pygraph/ged/bipartiteGED.py @@ -0,0 +1,33 @@ +import numpy as np +from scipy.optimize import linear_sum_assignment +from pygraph.ged.costfunctions import ConstantCostFunction + + +def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): + """Compute a Cost Matrix according to cost function cf""" + n = G1.number_of_nodes() + m = G2.number_of_nodes() + nm = n + m + C = np.ones([nm, nm])*np.inf + C[n:, m:] = 0 + + for u in G1.nodes(): + for v in G2.nodes(): + cost = cf.cns(u, v, G1, G2) + C[u, v] = cost + + for v in G1.nodes(): + C[v, m + v] = cf.cnd(v, G1) + + for v in G2.nodes(): + C[n + v, v] = cf.cni(v, G2) + return C + + +def getOptimalMapping(C, lsap_solver=linear_sum_assignment): + """Compute an optimal linear mapping according to cost Matrix C + inclure les progs C de Seb + + """ + row_ind, col_ind = lsap_solver(C) + return col_ind, row_ind[np.argsort(col_ind)] diff --git a/pygraph/ged/costfunctions.py b/pygraph/ged/costfunctions.py new file mode 100644 index 0000000..28318de --- /dev/null +++ b/pygraph/ged/costfunctions.py @@ -0,0 +1,138 @@ +import numpy as np +from scipy.optimize import linear_sum_assignment + + +class ConstantCostFunction: + """ Define a symmetric constant cost fonction for edit operations """ + def __init__(self, cns, cni, ces, cei): + self.cns_ = cns + self.cni_ = self.cnd_ = cni + self.ces_ = ces + self.cei_ = self.ced_ = cei + + def cns(self, node_u, node_v, g1, g2): + """ return substitution edit operation cost between node_u of G1 and node_v of G2""" + return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ + + def cnd(self, u, G1): + return self.cnd_ + + def cni(self, v, G2): + return self.cni_ + + def ces(self, e1, e2, G1, G2): + """tester avec des attributs autres que symboliques en testant + l'operateur __eq__""" + return (e1[2]['label'] != e2[2]['label'])*self.ces_ + + def ced(self, e1, G1): + return self.ced_ + + def cei(self, e2, G2): + return self.cei_ + + +class RiesenCostFunction(): + """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" + def __init__(self, cf, lsap_solver=linear_sum_assignment): + self.cf_ = cf + self.lsap_solver_ = lsap_solver + + def cns(self, u, v, G1, G2): + """ u et v sont des id de noeuds """ + n = len(G1[u]) + m = len(G2[v]) + sub_C = np.ones([n+m, n+m]) * np.inf + sub_C[n:, m:] = 0 + i = 0 + l_nbr_u = G1[u] + l_nbr_v = G2[v] + for nbr_u in l_nbr_u: + j = 0 + e1 = [u, nbr_u, G1[u][nbr_u]] + for nbr_v in G2[v]: + e2 = [v, nbr_v, G2[v][nbr_v]] + sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) + j += 1 + i += 1 + + i = 0 + for nbr_u in l_nbr_u: + sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) + i += 1 + + j = 0 + for nbr_v in l_nbr_v: + sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) + j += 1 + row_ind, col_ind = self.lsap_solver_(sub_C) + cost = np.sum(sub_C[row_ind, col_ind]) + return self.cf_.cns(u, v, G1, G2) + cost + + def cnd(self, u, G1): + cost = 0 + for nbr in G1[u]: + cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) + + return self.cf_.cnd(u,G1) + cost + + def cni(self, v, G2): + cost = 0 + for nbr in G2[v]: + cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) + + return self.cf_.cni(v, G2) + cost + + +class NeighboorhoodCostFunction(): + """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" + def __init__(self, cf, lsap_solver=linear_sum_assignment): + self.cf_ = cf + self.lsap_solver_ = lsap_solver + + def cns(self, u, v, G1, G2): + """ u et v sont des id de noeuds """ + n = len(G1[u]) + m = len(G2[v]) + sub_C = np.ones([n+m, n+m]) * np.inf + sub_C[n:, m:] = 0 + i = 0 + l_nbr_u = G1[u] + l_nbr_v = G2[v] + for nbr_u in l_nbr_u: + j = 0 + e1 = [u, nbr_u, G1[u][nbr_u]] + for nbr_v in G2[v]: + e2 = [v, nbr_v, G2[v][nbr_v]] + sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) + sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) + j += 1 + i += 1 + + i = 0 + for nbr_u in l_nbr_u: + sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) + sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) + i += 1 + + j = 0 + for nbr_v in l_nbr_v: + sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) + sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) + j += 1 + + row_ind, col_ind = self.lsap_solver_(sub_C) + cost = np.sum(sub_C[row_ind, col_ind]) + return self.cf_.cns(u, v, G1, G2) + cost + + def cnd(self, u, G1): + cost = 0 + for nbr in G1[u]: + cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) + return self.cf_.cnd(u, G1) + cost + + def cni(self, v, G2): + cost = 0 + for nbr in G2[v]: + cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) + return self.cf_.cni(v, G2) + cost diff --git a/pygraph/kernels/.gitignore b/pygraph/kernels/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/pygraph/kernels/spkernel.py b/pygraph/kernels/spkernel.py new file mode 100644 index 0000000..2b1823a --- /dev/null +++ b/pygraph/kernels/spkernel.py @@ -0,0 +1,68 @@ +import sys +import pathlib +sys.path.insert(0, "../") + + +import networkx as nx +import numpy as np +import time + +from utils.utils import getSPGraph + + +def spkernel(*args): + """Calculate shortest-path kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + / + G1, G2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + + Return + ------ + Kmatrix/Kernel : Numpy matrix/int + Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. + + References + ---------- + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. + """ + if len(args) == 1: # for a list of graphs + Gn = args[0] + + Kmatrix = np.zeros((len(Gn), len(Gn))) + + Sn = [] # get shortest path graphs of Gn + for i in range(0, len(Gn)): + Sn.append(getSPGraph(Gn[i])) + + start_time = time.time() + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Sn[i].edges(data = True): + for e2 in Sn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] += (0 if i == j else 1) + + print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) + + return Kmatrix + + else: # for only 2 graphs + G1 = args[0] + G2 = args[1] + + kernel = 0 + + for e1 in G1.edges(data = True): + for e2 in G2.edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + kernel += 1 + + print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) + + return kernel \ No newline at end of file diff --git a/pygraph/utils/__init__.py b/pygraph/utils/__init__.py new file mode 100644 index 0000000..501d1aa --- /dev/null +++ b/pygraph/utils/__init__.py @@ -0,0 +1,17 @@ +# -*-coding:utf-8 -*- +"""Pygraph - utils module + +Implement some methods to manage graphs + graphfiles.py : load .gxl and .ct files + utils.py : compute some properties on networkX graphs + + +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +from pygraph.utils import graphfiles +from pygraph.utils import utils diff --git a/pygraph/utils/graphfiles.py b/pygraph/utils/graphfiles.py new file mode 100644 index 0000000..c0ab9a3 --- /dev/null +++ b/pygraph/utils/graphfiles.py @@ -0,0 +1,87 @@ +import networkx as nx + +def loadCT(filename): + """load data from .ct file. + + Notes + ------ + a typical example of data in .ct is like this: + + 3 2 <- number of nodes and edges + 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? + 0.0000 0.0000 0.0000 C + 0.0000 0.0000 0.0000 O + 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? + 2 3 1 1 + """ + content = open(filename).read().splitlines() + G = nx.Graph(name=str(content[0])) # set name of the graph + tmp = content[1].split(" ") + if tmp[0] == '': + nb_nodes = int(tmp[1]) # number of the nodes + nb_edges = int(tmp[2]) # number of the edges + else: + nb_nodes = int(tmp[0]) + nb_edges = int(tmp[1]) + + for i in range(0, nb_nodes): + tmp = content[i + 2].split(" ") + tmp = [x for x in tmp if x != ''] + G.add_node(i, label=tmp[3]) + + for i in range(0, nb_edges): + tmp = content[i + G.number_of_nodes() + 2].split(" ") + tmp = [x for x in tmp if x != ''] + G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) + return G + + +def loadGXL(filename): + import networkx as nx + import xml.etree.ElementTree as ET + + tree = ET.parse(filename) + root = tree.getroot() + index = 0 + G = nx.Graph() + dic={} + for node in root.iter('node'): + label = node.find('attr')[0].text + dic[node.attrib['id']] = index + G.add_node(index, id=node.attrib['id'], label=label) + index += 1 + + for edge in root.iter('edge'): + label = edge.find('attr')[0].text + G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) + return G + +def loadDataset(filename): + """load file list of the dataset. + """ + from os.path import dirname, splitext + + dirname_dataset = dirname(filename) + extension = splitext(filename)[1][1:] + data = [] + y = [] + if(extension == "ds"): + content = open(filename).read().splitlines() + for i in range(0, len(content)): + tmp = content[i].split(' ') + data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names + y.append(float(tmp[1])) + elif(extension == "cxl"): + import xml.etree.ElementTree as ET + + tree = ET.parse(filename) + root = tree.getroot() + data = [] + y = [] + for graph in root.iter('print'): + mol_filename = graph.attrib['file'] + mol_class = graph.attrib['class'] + data.append(loadGXL(dirname_dataset + '/' + mol_filename)) + y.append(mol_class) + + return data, y diff --git a/pygraph/utils/utils.py b/pygraph/utils/utils.py new file mode 100644 index 0000000..0c7bf60 --- /dev/null +++ b/pygraph/utils/utils.py @@ -0,0 +1,59 @@ +import networkx as nx +import numpy as np + + +def getSPLengths(G1): + sp = nx.shortest_path(G1) + distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) + for i in sp.keys(): + for j in sp[i].keys(): + distances[i, j] = len(sp[i][j])-1 + return distances + +def getSPGraph(G): + """Transform graph G to its corresponding shortest-paths graph. + + Parameters + ---------- + G : NetworkX graph + The graph to be tramsformed. + + Return + ------ + S : NetworkX graph + The shortest-paths graph corresponding to G. + + Notes + ------ + For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. + + References + ---------- + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. + """ + return floydTransformation(G) + +def floydTransformation(G): + """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. + + Parameters + ---------- + G : NetworkX graph + The graph to be tramsformed. + + Return + ------ + S : NetworkX graph + The shortest-paths graph corresponding to G. + + References + ---------- + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. + """ + spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered + S = nx.Graph() + S.add_nodes_from(G.nodes(data=True)) + for i in range(0, G.number_of_nodes()): + for j in range(0, G.number_of_nodes()): + S.add_edge(i, j, cost = spMatrix[i, j]) + return S diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..8f42f9e --- /dev/null +++ b/tests/README.md @@ -0,0 +1,5 @@ +To use the library : +$> virtualenv --python=/usr/bin/python3.5 venv +$> pip install -r requirements.txt +$> source venv/bin/activate +... Go use pygraph diff --git a/tests/opt.py b/tests/opt.py new file mode 100644 index 0000000..f650bbf --- /dev/null +++ b/tests/opt.py @@ -0,0 +1,66 @@ +import ot +import sys +import pathlib +sys.path.insert(0, "../") + +from pygraph.utils.graphfiles import loadDataset +from pygraph.ged.costfunctions import ConstantCostFunction +from pygraph.utils.utils import getSPLengths +from tqdm import tqdm +import numpy as np +from scipy.optimize import linear_sum_assignment +from pygraph.ged.GED import ged +import scipy + +def pad(C, n): + C_pad = np.zeros((n, n)) + C_pad[:C.shape[0], :C.shape[1]] = C + return C_pad + +if (__name__ == "__main__"): + ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" + dataset, y = loadDataset(ds_filename) + cf = ConstantCostFunction(1, 3, 1, 3) + N = len(dataset) + + pairs = list() + + ged_distances = list() #np.zeros((N, N)) + gw_distances = list() #np.zeros((N, N)) + for i in tqdm(range(0, N)): + for j in tqdm(range(i, N)): + G1 = dataset[i] + G2 = dataset[j] + n = G1.number_of_nodes() + m = G2.number_of_nodes() + if(n == m): + C1 = getSPLengths(G1) + C2 = getSPLengths(G2) + + C1 /= C1.max() + C2 /= C2.max() + + dim = max(n, m) + if(n < m): + C1 = pad(C1, dim) + elif (m < n): + C2 = pad(C2, dim) + + p = ot.unif(dim) + q = ot.unif(dim) + + gw = ot.gromov_wasserstein(C1, C2, p, q, + 'square_loss', epsilon=5e-3) + row_ind, col_ind = linear_sum_assignment(-gw) + rho = col_ind + varrho = row_ind[np.argsort(col_ind)] + pairs.append((i,j)) + gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) + + ged_distances.append(ged(G1, G2, cf=cf)[0]) + + print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) + print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) + + np.save("distances_riesen", ged_distances) + np.save("distances_gw", gw_distances) diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..9505125 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,16 @@ +cycler==0.10.0 +Cython==0.27.3 +decorator==4.1.2 +matplotlib==2.1.0 +networkx==2.0 +numpy==1.13.3 +pkg-resources==0.0.0 +POT==0.4.0 +pyparsing==2.2.0 +python-dateutil==2.6.1 +pytz==2017.3 +scikit-learn==0.19.1 +scipy==1.0.0 +six==1.11.0 +sklearn==0.0 +tqdm==4.19.4