From 1f2d94d58cd34cff5590558595d04932ea07c249 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Tue, 5 Dec 2017 17:51:56 +0100 Subject: [PATCH] initial --- py-graph/__init__.py | 21 ----- py-graph/kernels/marginalizedKernel.py | 116 --------------------------- py-graph/kernels/pathKernel.py | 68 ---------------- py-graph/kernels/spkernel.py | 68 ---------------- py-graph/utils/__init__.py | 17 ---- py-graph/utils/graphfiles.py | 87 --------------------- py-graph/utils/utils.py | 59 -------------- pygraph/c_ext/Makefile | 5 -- pygraph/c_ext/README.md | 6 -- pygraph/c_ext/__init__.py | 17 ---- pygraph/c_ext/lsap.cpp | 43 ---------- pygraph/c_ext/lsape_binders.py | 23 ------ pygraph/ged/GED.py | 72 ----------------- pygraph/ged/__init__.py | 17 ---- pygraph/ged/bipartiteGED.py | 33 -------- pygraph/ged/costfunctions.py | 138 --------------------------------- pygraph/kernels/.gitignore | 0 pygraph/utils/__init__.py | 4 +- pygraph/utils/utils.py | 4 +- tests/README.md | 5 -- tests/opt.py | 66 ---------------- tests/requirements.txt | 16 ---- 22 files changed, 4 insertions(+), 881 deletions(-) delete mode 100644 py-graph/__init__.py delete mode 100644 py-graph/kernels/marginalizedKernel.py delete mode 100644 py-graph/kernels/pathKernel.py delete mode 100644 py-graph/kernels/spkernel.py delete mode 100644 py-graph/utils/__init__.py delete mode 100644 py-graph/utils/graphfiles.py delete mode 100644 py-graph/utils/utils.py delete mode 100644 pygraph/c_ext/Makefile delete mode 100644 pygraph/c_ext/README.md delete mode 100644 pygraph/c_ext/__init__.py delete mode 100644 pygraph/c_ext/lsap.cpp delete mode 100644 pygraph/c_ext/lsape_binders.py delete mode 100644 pygraph/ged/GED.py delete mode 100644 pygraph/ged/__init__.py delete mode 100644 pygraph/ged/bipartiteGED.py delete mode 100644 pygraph/ged/costfunctions.py delete mode 100644 pygraph/kernels/.gitignore delete mode 100644 tests/README.md delete mode 100644 tests/opt.py delete mode 100644 tests/requirements.txt diff --git a/py-graph/__init__.py b/py-graph/__init__.py deleted file mode 100644 index 3eafadb..0000000 --- a/py-graph/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*-coding:utf-8 -*- -""" -Pygraph - -This package contains 4 sub packages : - * c_ext : binders to C++ code - * ged : allows to compute graph edit distance between networkX graphs - * kernels : computation of graph kernels, ie graph similarity measure compatible with SVM - * notebooks : examples of code using this library - * utils : Diverse computation on graphs -""" - -# info -__version__ = "0.1" -__author__ = "Benoit Gaüzère" -__date__ = "November 2017" - -# import sub modules -from pygraph import c_ext -from pygraph import ged -from pygraph import utils diff --git a/py-graph/kernels/marginalizedKernel.py b/py-graph/kernels/marginalizedKernel.py deleted file mode 100644 index 983444f..0000000 --- a/py-graph/kernels/marginalizedKernel.py +++ /dev/null @@ -1,116 +0,0 @@ -import sys -import pathlib -sys.path.insert(0, "../") - -import networkx as nx -import numpy as np -import time - -def marginalizedkernel(*args): - """Calculate marginalized graph kernels between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - / - G1, G2 : NetworkX graphs - 2 graphs between which the kernel is calculated. - p_quit : integer - the termination probability in the random walks generating step - itr : integer - time of iterations to calculate R_inf - - Return - ------ - Kmatrix/Kernel : Numpy matrix/int - Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs. - - References - ---------- - [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. - """ - if len(args) == 3: # for a list of graphs - Gn = args[0] - - Kmatrix = np.zeros((len(Gn), len(Gn))) - - start_time = time.time() - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2]) - Kmatrix[j][i] = Kmatrix[i][j] - - print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) - - return Kmatrix - - else: # for only 2 graphs - - # init parameters - G1 = args[0] - G2 = args[1] - p_quit = args[2] # the termination probability in the random walks generating step - itr = args[3] # time of iterations to calculate R_inf - - kernel = 0 - num_nodes_G1 = nx.number_of_nodes(G1) - num_nodes_G2 = nx.number_of_nodes(G2) - p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|) - p_init_G2 = 1 / num_nodes_G2 - - q = p_quit * p_quit - r1 = q - - # initial R_inf - R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes - - # calculate R_inf with a simple interative method - for i in range(1, itr): - R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) - R_inf_new.fill(r1) - - # calculate R_inf for each pair of nodes - for node1 in G1.nodes(data = True): - neighbor_n1 = G1[node1[0]] - p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex) - for node2 in G2.nodes(data = True): - neighbor_n2 = G2[node2[0]] - p_trans_n2 = (1 - p_quit) / len(neighbor_n2) - - for neighbor1 in neighbor_n1: - for neighbor2 in neighbor_n2: - - t = p_trans_n1 * p_trans_n2 * \ - deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \ - deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label']) - R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8) - - R_inf[:] = R_inf_new - - # add elements of R_inf up and calculate kernel - for node1 in G1.nodes(data = True): - for node2 in G2.nodes(data = True): - s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label']) - kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) - - return kernel - -def deltaKernel(condition): - """Return 1 if condition holds, 0 otherwise. - - Parameters - ---------- - condition : Boolean - A condition, according to which the kernel is set to 1 or 0. - - Return - ------ - Kernel : integer - Delta Kernel. - - References - ---------- - [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. - """ - return (1 if condition else 0) \ No newline at end of file diff --git a/py-graph/kernels/pathKernel.py b/py-graph/kernels/pathKernel.py deleted file mode 100644 index 2b1823a..0000000 --- a/py-graph/kernels/pathKernel.py +++ /dev/null @@ -1,68 +0,0 @@ -import sys -import pathlib -sys.path.insert(0, "../") - - -import networkx as nx -import numpy as np -import time - -from utils.utils import getSPGraph - - -def spkernel(*args): - """Calculate shortest-path kernels between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - / - G1, G2 : NetworkX graphs - 2 graphs between which the kernel is calculated. - - Return - ------ - Kmatrix/Kernel : Numpy matrix/int - Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. - - References - ---------- - [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. - """ - if len(args) == 1: # for a list of graphs - Gn = args[0] - - Kmatrix = np.zeros((len(Gn), len(Gn))) - - Sn = [] # get shortest path graphs of Gn - for i in range(0, len(Gn)): - Sn.append(getSPGraph(Gn[i])) - - start_time = time.time() - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - for e1 in Sn[i].edges(data = True): - for e2 in Sn[j].edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - Kmatrix[i][j] += 1 - Kmatrix[j][i] += (0 if i == j else 1) - - print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) - - return Kmatrix - - else: # for only 2 graphs - G1 = args[0] - G2 = args[1] - - kernel = 0 - - for e1 in G1.edges(data = True): - for e2 in G2.edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - kernel += 1 - - print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) - - return kernel \ No newline at end of file diff --git a/py-graph/kernels/spkernel.py b/py-graph/kernels/spkernel.py deleted file mode 100644 index 2b1823a..0000000 --- a/py-graph/kernels/spkernel.py +++ /dev/null @@ -1,68 +0,0 @@ -import sys -import pathlib -sys.path.insert(0, "../") - - -import networkx as nx -import numpy as np -import time - -from utils.utils import getSPGraph - - -def spkernel(*args): - """Calculate shortest-path kernels between graphs. - - Parameters - ---------- - Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. - / - G1, G2 : NetworkX graphs - 2 graphs between which the kernel is calculated. - - Return - ------ - Kmatrix/Kernel : Numpy matrix/int - Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. - - References - ---------- - [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. - """ - if len(args) == 1: # for a list of graphs - Gn = args[0] - - Kmatrix = np.zeros((len(Gn), len(Gn))) - - Sn = [] # get shortest path graphs of Gn - for i in range(0, len(Gn)): - Sn.append(getSPGraph(Gn[i])) - - start_time = time.time() - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - for e1 in Sn[i].edges(data = True): - for e2 in Sn[j].edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - Kmatrix[i][j] += 1 - Kmatrix[j][i] += (0 if i == j else 1) - - print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) - - return Kmatrix - - else: # for only 2 graphs - G1 = args[0] - G2 = args[1] - - kernel = 0 - - for e1 in G1.edges(data = True): - for e2 in G2.edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - kernel += 1 - - print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) - - return kernel \ No newline at end of file diff --git a/py-graph/utils/__init__.py b/py-graph/utils/__init__.py deleted file mode 100644 index eef7c86..0000000 --- a/py-graph/utils/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -*-coding:utf-8 -*- -"""Pygraph - utils module - -Implement some methods to manage graphs - graphfiles.py : load .gxl and .ct files - utils.py : compute some properties on networkX graphs - - -""" - -# info -__version__ = "0.1" -__author__ = "Benoit Gaüzère" -__date__ = "November 2017" - -from utils import graphfiles -from utils import utils diff --git a/py-graph/utils/graphfiles.py b/py-graph/utils/graphfiles.py deleted file mode 100644 index c0ab9a3..0000000 --- a/py-graph/utils/graphfiles.py +++ /dev/null @@ -1,87 +0,0 @@ -import networkx as nx - -def loadCT(filename): - """load data from .ct file. - - Notes - ------ - a typical example of data in .ct is like this: - - 3 2 <- number of nodes and edges - 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? - 0.0000 0.0000 0.0000 C - 0.0000 0.0000 0.0000 O - 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? - 2 3 1 1 - """ - content = open(filename).read().splitlines() - G = nx.Graph(name=str(content[0])) # set name of the graph - tmp = content[1].split(" ") - if tmp[0] == '': - nb_nodes = int(tmp[1]) # number of the nodes - nb_edges = int(tmp[2]) # number of the edges - else: - nb_nodes = int(tmp[0]) - nb_edges = int(tmp[1]) - - for i in range(0, nb_nodes): - tmp = content[i + 2].split(" ") - tmp = [x for x in tmp if x != ''] - G.add_node(i, label=tmp[3]) - - for i in range(0, nb_edges): - tmp = content[i + G.number_of_nodes() + 2].split(" ") - tmp = [x for x in tmp if x != ''] - G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) - return G - - -def loadGXL(filename): - import networkx as nx - import xml.etree.ElementTree as ET - - tree = ET.parse(filename) - root = tree.getroot() - index = 0 - G = nx.Graph() - dic={} - for node in root.iter('node'): - label = node.find('attr')[0].text - dic[node.attrib['id']] = index - G.add_node(index, id=node.attrib['id'], label=label) - index += 1 - - for edge in root.iter('edge'): - label = edge.find('attr')[0].text - G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) - return G - -def loadDataset(filename): - """load file list of the dataset. - """ - from os.path import dirname, splitext - - dirname_dataset = dirname(filename) - extension = splitext(filename)[1][1:] - data = [] - y = [] - if(extension == "ds"): - content = open(filename).read().splitlines() - for i in range(0, len(content)): - tmp = content[i].split(' ') - data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names - y.append(float(tmp[1])) - elif(extension == "cxl"): - import xml.etree.ElementTree as ET - - tree = ET.parse(filename) - root = tree.getroot() - data = [] - y = [] - for graph in root.iter('print'): - mol_filename = graph.attrib['file'] - mol_class = graph.attrib['class'] - data.append(loadGXL(dirname_dataset + '/' + mol_filename)) - y.append(mol_class) - - return data, y diff --git a/py-graph/utils/utils.py b/py-graph/utils/utils.py deleted file mode 100644 index 52a85f1..0000000 --- a/py-graph/utils/utils.py +++ /dev/null @@ -1,59 +0,0 @@ -import networkx as nx -import numpy as np - - -def getSPLengths(G1): - sp = nx.shortest_path(G1) - distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) - for i in np.keys(): - for j in np[i].keys(): - distances[i, j] = len(sp[i][j])-1 - return distances - -def getSPGraph(G): - """Transform graph G to its corresponding shortest-paths graph. - - Parameters - ---------- - G : NetworkX graph - The graph to be tramsformed. - - Return - ------ - S : NetworkX graph - The shortest-paths graph corresponding to G. - - Notes - ------ - For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. - - References - ---------- - [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. - """ - return floydTransformation(G) - -def floydTransformation(G): - """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. - - Parameters - ---------- - G : NetworkX graph - The graph to be tramsformed. - - Return - ------ - S : NetworkX graph - The shortest-paths graph corresponding to G. - - References - ---------- - [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. - """ - spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered - S = nx.Graph() - S.add_nodes_from(G.nodes(data=True)) - for i in range(0, G.number_of_nodes()): - for j in range(0, G.number_of_nodes()): - S.add_edge(i, j, cost = spMatrix[i, j]) - return S diff --git a/pygraph/c_ext/Makefile b/pygraph/c_ext/Makefile deleted file mode 100644 index 71c3eb6..0000000 --- a/pygraph/c_ext/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# You must specify your env variable LSAPE_DIR -#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ - -liblsap.so:lsap.cpp - g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) diff --git a/pygraph/c_ext/README.md b/pygraph/c_ext/README.md deleted file mode 100644 index f1aa549..0000000 --- a/pygraph/c_ext/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Python wrapper for lsape method - -Specify your LSAPE_DIR env variable with the location of the source -code to compile - -source code : https://bougleux.users.greyc.fr/lsape/ diff --git a/pygraph/c_ext/__init__.py b/pygraph/c_ext/__init__.py deleted file mode 100644 index 7101e7e..0000000 --- a/pygraph/c_ext/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -*-coding:utf-8 -*- -"""Pygraph - c_ext module - -This package binds some C++ code to python - -lsape_binders.py : binders to C++ code of LSAPE methods implemented in -https://bougleux.users.greyc.fr/lsape/ - -""" - -# info -__version__ = "0.1" -__author__ = "Benoit Gaüzère" -__date__ = "November 2017" - -# import sub modules -from pygraph.c_ext import lsape_binders diff --git a/pygraph/c_ext/lsap.cpp b/pygraph/c_ext/lsap.cpp deleted file mode 100644 index 712a092..0000000 --- a/pygraph/c_ext/lsap.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* -Python wrapper -*/ - -#include "hungarian-lsape.hh" -#include "hungarian-lsap.hh" - -#include - -extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ - double * u = new double[nm]; - double * v = new double[nm]; - - int * rho_int = new int[nm]; - int * varrho_int = new int[nm]; - - hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); - //Find a better way to do - for (int i =0;i= m): - ged += cf.cnd(i, G1) - else: - ged += cf.cns(i, phi_i, G1, G2) - for j in G2.nodes(): - phi_j = varrho[j] - if(phi_j >= n): - ged += cf.cni(j, G2) - - for e in G1.edges(data=True): - i = e[0] - j = e[1] - phi_i = rho[i] - phi_j = rho[j] - if (phi_i < m) and (phi_j < m): - mappedEdge = len(list(filter(lambda x: True if - x == phi_j else False, G2[phi_i]))) - if(mappedEdge): - e2 = [phi_i, phi_j, G2[phi_i][phi_j]] - min_cost = min(cf.ces(e, e2, G1, G2), - cf.ced(e, G1) + cf.cei(e2, G2)) - ged += min_cost - else: - ged += cf.ced(e, G1) - else: - ged += cf.ced(e, G1) - for e in G2.edges(data=True): - i = e[0] - j = e[1] - phi_i = varrho[i] - phi_j = varrho[j] - if (phi_i < n) and (phi_j < n): - mappedEdge = len(list(filter(lambda x: True if x == phi_j - else False, G1[phi_i]))) - if(not mappedEdge): - ged += cf.cei(e, G2) - else: - ged += cf.ced(e, G2) - return ged, rho, varrho diff --git a/pygraph/ged/__init__.py b/pygraph/ged/__init__.py deleted file mode 100644 index 7f5b1bc..0000000 --- a/pygraph/ged/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -*-coding:utf-8 -*- -"""Pygraph - ged module - -Implement some methods to compute ged between graphs - - -""" - -# info -__version__ = "0.1" -__author__ = "Benoit Gaüzère" -__date__ = "November 2017" - -from pygraph.ged import costfunctions -from pygraph.ged import bipartiteGED -from pygraph.ged import GED - diff --git a/pygraph/ged/bipartiteGED.py b/pygraph/ged/bipartiteGED.py deleted file mode 100644 index b997f9e..0000000 --- a/pygraph/ged/bipartiteGED.py +++ /dev/null @@ -1,33 +0,0 @@ -import numpy as np -from scipy.optimize import linear_sum_assignment -from pygraph.ged.costfunctions import ConstantCostFunction - - -def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): - """Compute a Cost Matrix according to cost function cf""" - n = G1.number_of_nodes() - m = G2.number_of_nodes() - nm = n + m - C = np.ones([nm, nm])*np.inf - C[n:, m:] = 0 - - for u in G1.nodes(): - for v in G2.nodes(): - cost = cf.cns(u, v, G1, G2) - C[u, v] = cost - - for v in G1.nodes(): - C[v, m + v] = cf.cnd(v, G1) - - for v in G2.nodes(): - C[n + v, v] = cf.cni(v, G2) - return C - - -def getOptimalMapping(C, lsap_solver=linear_sum_assignment): - """Compute an optimal linear mapping according to cost Matrix C - inclure les progs C de Seb - - """ - row_ind, col_ind = lsap_solver(C) - return col_ind, row_ind[np.argsort(col_ind)] diff --git a/pygraph/ged/costfunctions.py b/pygraph/ged/costfunctions.py deleted file mode 100644 index 28318de..0000000 --- a/pygraph/ged/costfunctions.py +++ /dev/null @@ -1,138 +0,0 @@ -import numpy as np -from scipy.optimize import linear_sum_assignment - - -class ConstantCostFunction: - """ Define a symmetric constant cost fonction for edit operations """ - def __init__(self, cns, cni, ces, cei): - self.cns_ = cns - self.cni_ = self.cnd_ = cni - self.ces_ = ces - self.cei_ = self.ced_ = cei - - def cns(self, node_u, node_v, g1, g2): - """ return substitution edit operation cost between node_u of G1 and node_v of G2""" - return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ - - def cnd(self, u, G1): - return self.cnd_ - - def cni(self, v, G2): - return self.cni_ - - def ces(self, e1, e2, G1, G2): - """tester avec des attributs autres que symboliques en testant - l'operateur __eq__""" - return (e1[2]['label'] != e2[2]['label'])*self.ces_ - - def ced(self, e1, G1): - return self.ced_ - - def cei(self, e2, G2): - return self.cei_ - - -class RiesenCostFunction(): - """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" - def __init__(self, cf, lsap_solver=linear_sum_assignment): - self.cf_ = cf - self.lsap_solver_ = lsap_solver - - def cns(self, u, v, G1, G2): - """ u et v sont des id de noeuds """ - n = len(G1[u]) - m = len(G2[v]) - sub_C = np.ones([n+m, n+m]) * np.inf - sub_C[n:, m:] = 0 - i = 0 - l_nbr_u = G1[u] - l_nbr_v = G2[v] - for nbr_u in l_nbr_u: - j = 0 - e1 = [u, nbr_u, G1[u][nbr_u]] - for nbr_v in G2[v]: - e2 = [v, nbr_v, G2[v][nbr_v]] - sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) - j += 1 - i += 1 - - i = 0 - for nbr_u in l_nbr_u: - sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) - i += 1 - - j = 0 - for nbr_v in l_nbr_v: - sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) - j += 1 - row_ind, col_ind = self.lsap_solver_(sub_C) - cost = np.sum(sub_C[row_ind, col_ind]) - return self.cf_.cns(u, v, G1, G2) + cost - - def cnd(self, u, G1): - cost = 0 - for nbr in G1[u]: - cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) - - return self.cf_.cnd(u,G1) + cost - - def cni(self, v, G2): - cost = 0 - for nbr in G2[v]: - cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) - - return self.cf_.cni(v, G2) + cost - - -class NeighboorhoodCostFunction(): - """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" - def __init__(self, cf, lsap_solver=linear_sum_assignment): - self.cf_ = cf - self.lsap_solver_ = lsap_solver - - def cns(self, u, v, G1, G2): - """ u et v sont des id de noeuds """ - n = len(G1[u]) - m = len(G2[v]) - sub_C = np.ones([n+m, n+m]) * np.inf - sub_C[n:, m:] = 0 - i = 0 - l_nbr_u = G1[u] - l_nbr_v = G2[v] - for nbr_u in l_nbr_u: - j = 0 - e1 = [u, nbr_u, G1[u][nbr_u]] - for nbr_v in G2[v]: - e2 = [v, nbr_v, G2[v][nbr_v]] - sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) - sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) - j += 1 - i += 1 - - i = 0 - for nbr_u in l_nbr_u: - sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) - sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) - i += 1 - - j = 0 - for nbr_v in l_nbr_v: - sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) - sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) - j += 1 - - row_ind, col_ind = self.lsap_solver_(sub_C) - cost = np.sum(sub_C[row_ind, col_ind]) - return self.cf_.cns(u, v, G1, G2) + cost - - def cnd(self, u, G1): - cost = 0 - for nbr in G1[u]: - cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) - return self.cf_.cnd(u, G1) + cost - - def cni(self, v, G2): - cost = 0 - for nbr in G2[v]: - cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) - return self.cf_.cni(v, G2) + cost diff --git a/pygraph/kernels/.gitignore b/pygraph/kernels/.gitignore deleted file mode 100644 index e69de29..0000000 diff --git a/pygraph/utils/__init__.py b/pygraph/utils/__init__.py index 501d1aa..eef7c86 100644 --- a/pygraph/utils/__init__.py +++ b/pygraph/utils/__init__.py @@ -13,5 +13,5 @@ __version__ = "0.1" __author__ = "Benoit Gaüzère" __date__ = "November 2017" -from pygraph.utils import graphfiles -from pygraph.utils import utils +from utils import graphfiles +from utils import utils diff --git a/pygraph/utils/utils.py b/pygraph/utils/utils.py index 0c7bf60..52a85f1 100644 --- a/pygraph/utils/utils.py +++ b/pygraph/utils/utils.py @@ -5,8 +5,8 @@ import numpy as np def getSPLengths(G1): sp = nx.shortest_path(G1) distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) - for i in sp.keys(): - for j in sp[i].keys(): + for i in np.keys(): + for j in np[i].keys(): distances[i, j] = len(sp[i][j])-1 return distances diff --git a/tests/README.md b/tests/README.md deleted file mode 100644 index 8f42f9e..0000000 --- a/tests/README.md +++ /dev/null @@ -1,5 +0,0 @@ -To use the library : -$> virtualenv --python=/usr/bin/python3.5 venv -$> pip install -r requirements.txt -$> source venv/bin/activate -... Go use pygraph diff --git a/tests/opt.py b/tests/opt.py deleted file mode 100644 index f650bbf..0000000 --- a/tests/opt.py +++ /dev/null @@ -1,66 +0,0 @@ -import ot -import sys -import pathlib -sys.path.insert(0, "../") - -from pygraph.utils.graphfiles import loadDataset -from pygraph.ged.costfunctions import ConstantCostFunction -from pygraph.utils.utils import getSPLengths -from tqdm import tqdm -import numpy as np -from scipy.optimize import linear_sum_assignment -from pygraph.ged.GED import ged -import scipy - -def pad(C, n): - C_pad = np.zeros((n, n)) - C_pad[:C.shape[0], :C.shape[1]] = C - return C_pad - -if (__name__ == "__main__"): - ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" - dataset, y = loadDataset(ds_filename) - cf = ConstantCostFunction(1, 3, 1, 3) - N = len(dataset) - - pairs = list() - - ged_distances = list() #np.zeros((N, N)) - gw_distances = list() #np.zeros((N, N)) - for i in tqdm(range(0, N)): - for j in tqdm(range(i, N)): - G1 = dataset[i] - G2 = dataset[j] - n = G1.number_of_nodes() - m = G2.number_of_nodes() - if(n == m): - C1 = getSPLengths(G1) - C2 = getSPLengths(G2) - - C1 /= C1.max() - C2 /= C2.max() - - dim = max(n, m) - if(n < m): - C1 = pad(C1, dim) - elif (m < n): - C2 = pad(C2, dim) - - p = ot.unif(dim) - q = ot.unif(dim) - - gw = ot.gromov_wasserstein(C1, C2, p, q, - 'square_loss', epsilon=5e-3) - row_ind, col_ind = linear_sum_assignment(-gw) - rho = col_ind - varrho = row_ind[np.argsort(col_ind)] - pairs.append((i,j)) - gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) - - ged_distances.append(ged(G1, G2, cf=cf)[0]) - - print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) - print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) - - np.save("distances_riesen", ged_distances) - np.save("distances_gw", gw_distances) diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index 9505125..0000000 --- a/tests/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -cycler==0.10.0 -Cython==0.27.3 -decorator==4.1.2 -matplotlib==2.1.0 -networkx==2.0 -numpy==1.13.3 -pkg-resources==0.0.0 -POT==0.4.0 -pyparsing==2.2.0 -python-dateutil==2.6.1 -pytz==2017.3 -scikit-learn==0.19.1 -scipy==1.0.0 -six==1.11.0 -sklearn==0.0 -tqdm==4.19.4