@@ -0,0 +1,74 @@ | |||||
from ged.costfunctions import BasicCostFunction, RiesenCostFunction | |||||
from ged.costfunctions import NeighboorhoodCostFunction | |||||
from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping | |||||
def ged(G1, G2, method='Riesen', rho=None, varrho=None, | |||||
cf=BasicCostFunction(1, 3, 1, 3)): | |||||
"""Compute Graph Edit Distance between G1 and G2 according to mapping | |||||
encoded within rho and varrho. Graph's node must be indexed by a | |||||
index which is used is rho and varrho | |||||
NB: Utilisation de | |||||
dictionnaire pour etre plus versatile ? | |||||
""" | |||||
if ((rho is None) or (varrho is None)): | |||||
if(method == 'Riesen'): | |||||
cf_bp = RiesenCostFunction(cf) | |||||
elif(method == 'Neighboorhood'): | |||||
cf_bp = NeighboorhoodCostFunction(cf) | |||||
elif(method == 'Basic'): | |||||
cf_bp = cf | |||||
else: | |||||
raise NameError('Non existent method ') | |||||
rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp)) | |||||
n = G1.number_of_nodes() | |||||
m = G2.number_of_nodes() | |||||
ged = 0 | |||||
for i in G1.nodes_iter(): | |||||
phi_i = rho[i] | |||||
if(phi_i >= m): | |||||
ged += cf.cnd(i, G1) | |||||
else: | |||||
ged += cf.cns(i, phi_i, G1, G2) | |||||
for j in G2.nodes_iter(): | |||||
phi_j = varrho[j] | |||||
if(phi_j >= n): | |||||
ged += cf.cni(j, G2) | |||||
for e in G1.edges_iter(data=True): | |||||
i = e[0] | |||||
j = e[1] | |||||
phi_i = rho[i] | |||||
phi_j = rho[j] | |||||
if (phi_i < m) and (phi_j < m): | |||||
mappedEdge = len(list(filter(lambda x: True if | |||||
x == phi_j else False, G2[phi_i]))) | |||||
if(mappedEdge): | |||||
e2 = [phi_i, phi_j, G2[phi_i][phi_j]] | |||||
min_cost = min(cf.ces(e, e2, G1, G2), | |||||
cf.ced(e, G1), cf.cei(e2, G2)) | |||||
ged += min_cost | |||||
else: | |||||
ged += cf.ced(e, G1) | |||||
else: | |||||
ged += cf.ced(e, G1) | |||||
for e in G2.edges_iter(data=True): | |||||
i = e[0] | |||||
j = e[1] | |||||
phi_i = varrho[i] | |||||
phi_j = varrho[j] | |||||
if (phi_i < n) and (phi_j < n): | |||||
mappedEdge = len(list(filter(lambda x: True if x == phi_j | |||||
else False, G1[phi_i]))) | |||||
if(not mappedEdge): | |||||
ged += cf.cei(e, G2) | |||||
else: | |||||
ged += cf.ced(e, G2) | |||||
return ged, rho, varrho | |||||
def computeDistanceMatrix(dataset): | |||||
pass |
@@ -0,0 +1,33 @@ | |||||
import numpy as np | |||||
from scipy.optimize import linear_sum_assignment | |||||
from ged.costfunctions import BasicCostFunction | |||||
def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): | |||||
"""Compute a Cost Matrix according to cost function cf""" | |||||
n = G1.number_of_nodes() | |||||
m = G2.number_of_nodes() | |||||
nm = n + m | |||||
C = np.ones([nm, nm])*np.inf | |||||
C[n:, m:] = 0 | |||||
for u in G1.nodes_iter(): | |||||
for v in G2.nodes_iter(): | |||||
cost = cf.cns(u, v, G1, G2) | |||||
C[u, v] = cost | |||||
for v in G1.nodes_iter(): | |||||
C[v, m + v] = cf.cnd(v, G1) | |||||
for v in G2.nodes_iter(): | |||||
C[n + v, v] = cf.cni(v, G2) | |||||
return C | |||||
def getOptimalMapping(C): | |||||
"""Compute an optimal linear mapping according to cost Matrix C | |||||
inclure les progs C de Seb | |||||
""" | |||||
row_ind, col_ind = linear_sum_assignment(C) | |||||
return col_ind, row_ind[np.argsort(col_ind)] |
@@ -0,0 +1,133 @@ | |||||
import numpy as np | |||||
from scipy.optimize import linear_sum_assignment | |||||
class BasicCostFunction: | |||||
def __init__(self, cns, cni, ces, cei): | |||||
self.cns_ = cns | |||||
self.cni_ = self.cnd_ = cni | |||||
self.ces_ = ces | |||||
self.cei_ = self.ced_ = cei | |||||
def cns(self, u, v, G1, G2): | |||||
return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_ | |||||
def cnd(self, u, G1): | |||||
return self.cnd_ | |||||
def cni(self, v, G2): | |||||
return self.cni_ | |||||
def ces(self, e1, e2, G1, G2): | |||||
"""tester avec des attributs autres que symboliques en testant | |||||
l'operateur __eq__""" | |||||
return (e1[2]['label'] != e2[2]['label'])*self.ces_ | |||||
def ced(self, e1, G1): | |||||
return self.ced_ | |||||
def cei(self, e2, G2): | |||||
return self.cei_ | |||||
class RiesenCostFunction(BasicCostFunction): | |||||
def __init__(self, cf): | |||||
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||||
def cns(self, u, v, G1, G2): | |||||
""" u et v sont des id de noeuds """ | |||||
n = len(G1[u]) | |||||
m = len(G2[v]) | |||||
sub_C = np.ones([n+m, n+m]) * np.inf | |||||
sub_C[n:, m:] = 0 | |||||
i = 0 | |||||
l_nbr_u = G1[u] | |||||
l_nbr_v = G2[v] | |||||
for nbr_u in l_nbr_u: | |||||
j = 0 | |||||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||||
for nbr_v in G2[v]: | |||||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||||
sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||||
j += 1 | |||||
i += 1 | |||||
i = 0 | |||||
for nbr_u in l_nbr_u: | |||||
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||||
i += 1 | |||||
j = 0 | |||||
for nbr_v in l_nbr_v: | |||||
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||||
j += 1 | |||||
row_ind, col_ind = linear_sum_assignment(sub_C) | |||||
cost = np.sum(sub_C[row_ind, col_ind]) | |||||
return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||||
def cnd(self, u, G1): | |||||
cost = 0 | |||||
for nbr in G1[u]: | |||||
cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1) | |||||
return BasicCostFunction.cnd(self,u,G1) + cost | |||||
def cni(self, v, G2): | |||||
cost = 0 | |||||
for nbr in G2[v]: | |||||
cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2) | |||||
return BasicCostFunction.cni(self, v, G2) + cost | |||||
class NeighboorhoodCostFunction(BasicCostFunction): | |||||
def __init__(self, cf): | |||||
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) | |||||
def cns(self, u, v, G1, G2): | |||||
""" u et v sont des id de noeuds """ | |||||
n = len(G1[u]) | |||||
m = len(G2[v]) | |||||
sub_C = np.ones([n+m, n+m]) * np.inf | |||||
sub_C[n:, m:] = 0 | |||||
i = 0 | |||||
l_nbr_u = G1[u] | |||||
l_nbr_v = G2[v] | |||||
for nbr_u in l_nbr_u: | |||||
j = 0 | |||||
e1 = [u, nbr_u, G1[u][nbr_u]] | |||||
for nbr_v in G2[v]: | |||||
e2 = [v, nbr_v, G2[v][nbr_v]] | |||||
sub_C[i, j] = self.ces(e1, e2, G1, G2) | |||||
sub_C[i, j] += BasicCostFunction.cns(self, | |||||
nbr_u, nbr_v, G1, G2) | |||||
j += 1 | |||||
i += 1 | |||||
i = 0 | |||||
for nbr_u in l_nbr_u: | |||||
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) | |||||
sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1) | |||||
i += 1 | |||||
j = 0 | |||||
for nbr_v in l_nbr_v: | |||||
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) | |||||
sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2) | |||||
j += 1 | |||||
row_ind, col_ind = linear_sum_assignment(sub_C) | |||||
cost = np.sum(sub_C[row_ind, col_ind]) | |||||
return BasicCostFunction.cns(self, u, v, G1, G2) + cost | |||||
def cnd(self, u, G1): | |||||
cost = 0 | |||||
for nbr in G1[u]: | |||||
cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1) | |||||
return BasicCostFunction.cnd(self, u, G1) + cost | |||||
def cni(self, v, G2): | |||||
cost = 0 | |||||
for nbr in G2[v]: | |||||
cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2) | |||||
return BasicCostFunction.cni(self, v, G2) + cost |
@@ -0,0 +1,3 @@ | |||||
import sys | |||||
import pathlib | |||||
sys.path.insert(0, "../") |
@@ -0,0 +1,74 @@ | |||||
import networkx as nx | |||||
def loadCT(filename): | |||||
content = open(filename).read().splitlines() | |||||
G = nx.Graph(name=str(content[0])) | |||||
tmp = content[1].split(" ") | |||||
if tmp[0] == '': | |||||
nb_nodes = int(tmp[1]) | |||||
nb_edges = int(tmp[2]) | |||||
else: | |||||
nb_nodes = int(tmp[0]) | |||||
nb_edges = int(tmp[1]) | |||||
for i in range(0, nb_nodes): | |||||
tmp = content[i + 2].split(" ") | |||||
tmp = [x for x in tmp if x != ''] | |||||
G.add_node(i, label=tmp[3]) | |||||
for i in range(0, nb_edges): | |||||
tmp = content[i+G.number_of_nodes()+2].split(" ") | |||||
tmp = [x for x in tmp if x != ''] | |||||
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) | |||||
return G | |||||
def loadGXL(filename): | |||||
import networkx as nx | |||||
import xml.etree.ElementTree as ET | |||||
tree = ET.parse(filename) | |||||
root = tree.getroot() | |||||
index = 0 | |||||
G = nx.Graph() | |||||
dic={} | |||||
for node in root.iter('node'): | |||||
label = node.find('attr')[0].text | |||||
dic[node.attrib['id']] = index | |||||
G.add_node(index, id=node.attrib['id'], label=label) | |||||
index += 1 | |||||
for edge in root.iter('edge'): | |||||
label = edge.find('attr')[0].text | |||||
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) | |||||
return G | |||||
def loadDataset(filename): | |||||
from os.path import dirname, splitext | |||||
dirname_dataset = dirname(filename) | |||||
extension = splitext(filename)[1][1:] | |||||
data = [] | |||||
y = [] | |||||
if(extension == "ds"): | |||||
content = open(filename).read().splitlines() | |||||
for i in range(0, len(content)): | |||||
tmp = content[i].split(' ') | |||||
data.append(loadCT(dirname_dataset + '/' + tmp[0])) | |||||
y.append(float(tmp[1])) | |||||
elif(extension == "cxl"): | |||||
import xml.etree.ElementTree as ET | |||||
tree = ET.parse(filename) | |||||
root = tree.getroot() | |||||
data = [] | |||||
y = [] | |||||
for graph in root.iter('print'): | |||||
mol_filename = graph.attrib['file'] | |||||
mol_class = graph.attrib['class'] | |||||
data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||||
y.append(mol_class) | |||||
return data, y |
@@ -0,0 +1,10 @@ | |||||
import networkx as nx | |||||
import numpy as np | |||||
def getSPLengths(G1): | |||||
sp = nx.shortest_path(G1) | |||||
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) | |||||
for i in np.keys(): | |||||
for j in np[i].keys(): | |||||
distances[i, j] = len(sp[i][j])-1 |