@@ -0,0 +1,74 @@ | |||
from ged.costfunctions import BasicCostFunction, RiesenCostFunction | |||
from ged.costfunctions import NeighboorhoodCostFunction | |||
from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||
def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||
cf=BasicCostFunction(1, 3, 1, 3)): | |||
"""Compute Graph Edit Distance between G1 and G2 according to mapping | |||
encoded within rho and varrho. Graph's node must be indexed by a | |||
index which is used is rho and varrho | |||
NB: Utilisation de | |||
dictionnaire pour etre plus versatile ? | |||
""" | |||
if ((rho is None) or (varrho is None)): | |||
if(method == 'Riesen'): | |||
cf_bp = RiesenCostFunction(cf) | |||
elif(method == 'Neighboorhood'): | |||
cf_bp = NeighboorhoodCostFunction(cf) | |||
elif(method == 'Basic'): | |||
cf_bp = cf | |||
else: | |||
raise NameError('Non existent method ') | |||
rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp)) | |||
n = G1.number_of_nodes() | |||
m = G2.number_of_nodes() | |||
ged = 0 | |||
for i in G1.nodes_iter(): | |||
phi_i = rho[i] | |||
if(phi_i >= m): | |||
ged += cf.cnd(i, G1) | |||
else: | |||
ged += cf.cns(i, phi_i, G1, G2) | |||
for j in G2.nodes_iter(): | |||
phi_j = varrho[j] | |||
if(phi_j >= n): | |||
ged += cf.cni(j, G2) | |||
for e in G1.edges_iter(data=True): | |||
i = e[0] | |||
j = e[1] | |||
phi_i = rho[i] | |||
phi_j = rho[j] | |||
if (phi_i < m) and (phi_j < m): | |||
mappedEdge = len(list(filter(lambda x: True if | |||
x == phi_j else False, G2[phi_i]))) | |||
if(mappedEdge): | |||
e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||
min_cost = min(cf.ces(e, e2, G1, G2), | |||
cf.ced(e, G1), cf.cei(e2, G2)) | |||
ged += min_cost | |||
else: | |||
ged += cf.ced(e, G1) | |||
else: | |||
ged += cf.ced(e, G1) | |||
for e in G2.edges_iter(data=True): | |||
i = e[0] | |||
j = e[1] | |||
phi_i = varrho[i] | |||
phi_j = varrho[j] | |||
if (phi_i < n) and (phi_j < n): | |||
mappedEdge = len(list(filter(lambda x: True if x == phi_j | |||
else False, G1[phi_i]))) | |||
if(not mappedEdge): | |||
ged += cf.cei(e, G2) | |||
else: | |||
ged += cf.ced(e, G2) | |||
return ged, rho, varrho | |||
def computeDistanceMatrix(dataset): | |||
pass |
@@ -0,0 +1,33 @@ | |||
import numpy as np | |||
from scipy.optimize import linear_sum_assignment | |||
from ged.costfunctions import BasicCostFunction | |||
def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): | |||
"""Compute a Cost Matrix according to cost function cf""" | |||
n = G1.number_of_nodes() | |||
m = G2.number_of_nodes() | |||
nm = n + m | |||
C = np.ones([nm, nm])*np.inf | |||
C[n:, m:] = 0 | |||
for u in G1.nodes_iter(): | |||
for v in G2.nodes_iter(): | |||
cost = cf.cns(u, v, G1, G2) | |||
C[u, v] = cost | |||
for v in G1.nodes_iter(): | |||
C[v, m + v] = cf.cnd(v, G1) | |||
for v in G2.nodes_iter(): | |||
C[n + v, v] = cf.cni(v, G2) | |||
return C | |||
def getOptimalMapping(C): | |||
"""Compute an optimal linear mapping according to cost Matrix C | |||
inclure les progs C de Seb | |||
""" | |||
row_ind, col_ind = linear_sum_assignment(C) | |||
return col_ind, row_ind[np.argsort(col_ind)] |
@@ -0,0 +1,133 @@ | |||
import numpy as np | |||
from scipy.optimize import linear_sum_assignment | |||
class BasicCostFunction: | |||
def __init__(self, cns, cni, ces, cei): | |||
self.cns_ = cns | |||
self.cni_ = self.cnd_ = cni | |||
self.ces_ = ces | |||
self.cei_ = self.ced_ = cei | |||
def cns(self, u, v, G1, G2): | |||
return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_ | |||
def cnd(self, u, G1): | |||
return self.cnd_ | |||
def cni(self, v, G2): | |||
return self.cni_ | |||
def ces(self, e1, e2, G1, G2): | |||
"""tester avec des attributs autres que symboliques en testant | |||
l'operateur __eq__""" | |||
return (e1[2]['label'] != e2[2]['label'])*self.ces_ | |||
def ced(self, e1, G1): | |||
return self.ced_ | |||
def cei(self, e2, G2): | |||
return self.cei_ | |||
class RiesenCostFunction(BasicCostFunction): | |||
def __init__(self, cf): | |||
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||
def cns(self, u, v, G1, G2): | |||
""" u et v sont des id de noeuds """ | |||
n = len(G1[u]) | |||
m = len(G2[v]) | |||
sub_C = np.ones([n+m, n+m]) * np.inf | |||
sub_C[n:, m:] = 0 | |||
i = 0 | |||
l_nbr_u = G1[u] | |||
l_nbr_v = G2[v] | |||
for nbr_u in l_nbr_u: | |||
j = 0 | |||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||
for nbr_v in G2[v]: | |||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||
sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||
j += 1 | |||
i += 1 | |||
i = 0 | |||
for nbr_u in l_nbr_u: | |||
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
i += 1 | |||
j = 0 | |||
for nbr_v in l_nbr_v: | |||
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
j += 1 | |||
row_ind, col_ind = linear_sum_assignment(sub_C) | |||
cost = np.sum(sub_C[row_ind, col_ind]) | |||
return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||
def cnd(self, u, G1): | |||
cost = 0 | |||
for nbr in G1[u]: | |||
cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1) | |||
return BasicCostFunction.cnd(self,u,G1) + cost | |||
def cni(self, v, G2): | |||
cost = 0 | |||
for nbr in G2[v]: | |||
cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2) | |||
return BasicCostFunction.cni(self, v, G2) + cost | |||
class NeighboorhoodCostFunction(BasicCostFunction): | |||
def __init__(self, cf): | |||
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||
def cns(self, u, v, G1, G2): | |||
""" u et v sont des id de noeuds """ | |||
n = len(G1[u]) | |||
m = len(G2[v]) | |||
sub_C = np.ones([n+m, n+m]) * np.inf | |||
sub_C[n:, m:] = 0 | |||
i = 0 | |||
l_nbr_u = G1[u] | |||
l_nbr_v = G2[v] | |||
for nbr_u in l_nbr_u: | |||
j = 0 | |||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||
for nbr_v in G2[v]: | |||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||
sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||
sub_C[i, j] += BasicCostFunction.cns(self, | |||
nbr_u, nbr_v, G1, G2) | |||
j += 1 | |||
i += 1 | |||
i = 0 | |||
for nbr_u in l_nbr_u: | |||
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||
sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1) | |||
i += 1 | |||
j = 0 | |||
for nbr_v in l_nbr_v: | |||
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||
sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2) | |||
j += 1 | |||
row_ind, col_ind = linear_sum_assignment(sub_C) | |||
cost = np.sum(sub_C[row_ind, col_ind]) | |||
return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||
def cnd(self, u, G1): | |||
cost = 0 | |||
for nbr in G1[u]: | |||
cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1) | |||
return BasicCostFunction.cnd(self, u, G1) + cost | |||
def cni(self, v, G2): | |||
cost = 0 | |||
for nbr in G2[v]: | |||
cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2) | |||
return BasicCostFunction.cni(self, v, G2) + cost |
@@ -0,0 +1,3 @@ | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") |
@@ -0,0 +1,74 @@ | |||
import networkx as nx | |||
def loadCT(filename): | |||
content = open(filename).read().splitlines() | |||
G = nx.Graph(name=str(content[0])) | |||
tmp = content[1].split(" ") | |||
if tmp[0] == '': | |||
nb_nodes = int(tmp[1]) | |||
nb_edges = int(tmp[2]) | |||
else: | |||
nb_nodes = int(tmp[0]) | |||
nb_edges = int(tmp[1]) | |||
for i in range(0, nb_nodes): | |||
tmp = content[i + 2].split(" ") | |||
tmp = [x for x in tmp if x != ''] | |||
G.add_node(i, label=tmp[3]) | |||
for i in range(0, nb_edges): | |||
tmp = content[i+G.number_of_nodes()+2].split(" ") | |||
tmp = [x for x in tmp if x != ''] | |||
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||
return G | |||
def loadGXL(filename): | |||
import networkx as nx | |||
import xml.etree.ElementTree as ET | |||
tree = ET.parse(filename) | |||
root = tree.getroot() | |||
index = 0 | |||
G = nx.Graph() | |||
dic={} | |||
for node in root.iter('node'): | |||
label = node.find('attr')[0].text | |||
dic[node.attrib['id']] = index | |||
G.add_node(index, id=node.attrib['id'], label=label) | |||
index += 1 | |||
for edge in root.iter('edge'): | |||
label = edge.find('attr')[0].text | |||
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||
return G | |||
def loadDataset(filename): | |||
from os.path import dirname, splitext | |||
dirname_dataset = dirname(filename) | |||
extension = splitext(filename)[1][1:] | |||
data = [] | |||
y = [] | |||
if(extension == "ds"): | |||
content = open(filename).read().splitlines() | |||
for i in range(0, len(content)): | |||
tmp = content[i].split(' ') | |||
data.append(loadCT(dirname_dataset + '/' + tmp[0])) | |||
y.append(float(tmp[1])) | |||
elif(extension == "cxl"): | |||
import xml.etree.ElementTree as ET | |||
tree = ET.parse(filename) | |||
root = tree.getroot() | |||
data = [] | |||
y = [] | |||
for graph in root.iter('print'): | |||
mol_filename = graph.attrib['file'] | |||
mol_class = graph.attrib['class'] | |||
data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||
y.append(mol_class) | |||
return data, y |
@@ -0,0 +1,10 @@ | |||
import networkx as nx | |||
import numpy as np | |||
def getSPLengths(G1): | |||
sp = nx.shortest_path(G1) | |||
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||
for i in np.keys(): | |||
for j in np[i].keys(): | |||
distances[i, j] = len(sp[i][j])-1 |