Browse Source

First init - may contains some errors...

v0.1
Benoit GAUZERE 7 years ago
parent
commit
91cb5d2ee6
8 changed files with 502 additions and 0 deletions
  1. +74
    -0
      ged/GED.py
  2. +33
    -0
      ged/bipartiteGED.py
  3. +133
    -0
      ged/costfunctions.py
  4. +0
    -0
      kernels/.gitignore
  5. +3
    -0
      notebooks/paths.py
  6. +175
    -0
      notebooks/test_lib.ipynb
  7. +74
    -0
      utils/graphfiles.py
  8. +10
    -0
      utils/utils.py

+ 74
- 0
ged/GED.py View File

@@ -0,0 +1,74 @@
from ged.costfunctions import BasicCostFunction, RiesenCostFunction
from ged.costfunctions import NeighboorhoodCostFunction
from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping


def ged(G1, G2, method='Riesen', rho=None, varrho=None,
cf=BasicCostFunction(1, 3, 1, 3)):
"""Compute Graph Edit Distance between G1 and G2 according to mapping
encoded within rho and varrho. Graph's node must be indexed by a
index which is used is rho and varrho
NB: Utilisation de
dictionnaire pour etre plus versatile ?

"""
if ((rho is None) or (varrho is None)):
if(method == 'Riesen'):
cf_bp = RiesenCostFunction(cf)
elif(method == 'Neighboorhood'):
cf_bp = NeighboorhoodCostFunction(cf)
elif(method == 'Basic'):
cf_bp = cf
else:
raise NameError('Non existent method ')

rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp))

n = G1.number_of_nodes()
m = G2.number_of_nodes()
ged = 0
for i in G1.nodes_iter():
phi_i = rho[i]
if(phi_i >= m):
ged += cf.cnd(i, G1)
else:
ged += cf.cns(i, phi_i, G1, G2)
for j in G2.nodes_iter():
phi_j = varrho[j]
if(phi_j >= n):
ged += cf.cni(j, G2)

for e in G1.edges_iter(data=True):
i = e[0]
j = e[1]
phi_i = rho[i]
phi_j = rho[j]
if (phi_i < m) and (phi_j < m):
mappedEdge = len(list(filter(lambda x: True if
x == phi_j else False, G2[phi_i])))
if(mappedEdge):
e2 = [phi_i, phi_j, G2[phi_i][phi_j]]
min_cost = min(cf.ces(e, e2, G1, G2),
cf.ced(e, G1), cf.cei(e2, G2))
ged += min_cost
else:
ged += cf.ced(e, G1)
else:
ged += cf.ced(e, G1)
for e in G2.edges_iter(data=True):
i = e[0]
j = e[1]
phi_i = varrho[i]
phi_j = varrho[j]
if (phi_i < n) and (phi_j < n):
mappedEdge = len(list(filter(lambda x: True if x == phi_j
else False, G1[phi_i])))
if(not mappedEdge):
ged += cf.cei(e, G2)
else:
ged += cf.ced(e, G2)
return ged, rho, varrho


def computeDistanceMatrix(dataset):
pass

+ 33
- 0
ged/bipartiteGED.py View File

@@ -0,0 +1,33 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from ged.costfunctions import BasicCostFunction


def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)):
"""Compute a Cost Matrix according to cost function cf"""
n = G1.number_of_nodes()
m = G2.number_of_nodes()
nm = n + m
C = np.ones([nm, nm])*np.inf
C[n:, m:] = 0

for u in G1.nodes_iter():
for v in G2.nodes_iter():
cost = cf.cns(u, v, G1, G2)
C[u, v] = cost

for v in G1.nodes_iter():
C[v, m + v] = cf.cnd(v, G1)

for v in G2.nodes_iter():
C[n + v, v] = cf.cni(v, G2)
return C


def getOptimalMapping(C):
"""Compute an optimal linear mapping according to cost Matrix C
inclure les progs C de Seb

"""
row_ind, col_ind = linear_sum_assignment(C)
return col_ind, row_ind[np.argsort(col_ind)]

+ 133
- 0
ged/costfunctions.py View File

@@ -0,0 +1,133 @@
import numpy as np
from scipy.optimize import linear_sum_assignment


class BasicCostFunction:
def __init__(self, cns, cni, ces, cei):
self.cns_ = cns
self.cni_ = self.cnd_ = cni
self.ces_ = ces
self.cei_ = self.ced_ = cei

def cns(self, u, v, G1, G2):
return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_

def cnd(self, u, G1):
return self.cnd_

def cni(self, v, G2):
return self.cni_

def ces(self, e1, e2, G1, G2):
"""tester avec des attributs autres que symboliques en testant
l'operateur __eq__"""
return (e1[2]['label'] != e2[2]['label'])*self.ces_

def ced(self, e1, G1):
return self.ced_

def cei(self, e2, G2):
return self.cei_


class RiesenCostFunction(BasicCostFunction):
def __init__(self, cf):
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_)

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
n = len(G1[u])
m = len(G2[v])
sub_C = np.ones([n+m, n+m]) * np.inf
sub_C[n:, m:] = 0
i = 0
l_nbr_u = G1[u]
l_nbr_v = G2[v]
for nbr_u in l_nbr_u:
j = 0
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.ces(e1, e2, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2)
j += 1
row_ind, col_ind = linear_sum_assignment(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return BasicCostFunction.cns(self, u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1)
return BasicCostFunction.cnd(self,u,G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2)
return BasicCostFunction.cni(self, v, G2) + cost


class NeighboorhoodCostFunction(BasicCostFunction):
def __init__(self, cf):
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_)

def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
n = len(G1[u])
m = len(G2[v])
sub_C = np.ones([n+m, n+m]) * np.inf
sub_C[n:, m:] = 0
i = 0
l_nbr_u = G1[u]
l_nbr_v = G2[v]
for nbr_u in l_nbr_u:
j = 0
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.ces(e1, e2, G1, G2)
sub_C[i, j] += BasicCostFunction.cns(self,
nbr_u, nbr_v, G1, G2)
j += 1
i += 1

i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1)
i += 1

j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2)
j += 1

row_ind, col_ind = linear_sum_assignment(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return BasicCostFunction.cns(self, u, v, G1, G2) + cost

def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1)
return BasicCostFunction.cnd(self, u, G1) + cost

def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2)
return BasicCostFunction.cni(self, v, G2) + cost

+ 0
- 0
kernels/.gitignore View File


+ 3
- 0
notebooks/paths.py View File

@@ -0,0 +1,3 @@
import sys
import pathlib
sys.path.insert(0, "../")

+ 175
- 0
notebooks/test_lib.ipynb
File diff suppressed because it is too large
View File


+ 74
- 0
utils/graphfiles.py View File

@@ -0,0 +1,74 @@
import networkx as nx


def loadCT(filename):
content = open(filename).read().splitlines()
G = nx.Graph(name=str(content[0]))
tmp = content[1].split(" ")
if tmp[0] == '':
nb_nodes = int(tmp[1])
nb_edges = int(tmp[2])
else:
nb_nodes = int(tmp[0])
nb_edges = int(tmp[1])

for i in range(0, nb_nodes):
tmp = content[i + 2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_node(i, label=tmp[3])

for i in range(0, nb_edges):
tmp = content[i+G.number_of_nodes()+2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
return G


def loadGXL(filename):
import networkx as nx
import xml.etree.ElementTree as ET

tree = ET.parse(filename)
root = tree.getroot()
index = 0
G = nx.Graph()
dic={}
for node in root.iter('node'):
label = node.find('attr')[0].text
dic[node.attrib['id']] = index
G.add_node(index, id=node.attrib['id'], label=label)
index += 1
for edge in root.iter('edge'):
label = edge.find('attr')[0].text
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
return G


def loadDataset(filename):
from os.path import dirname, splitext

dirname_dataset = dirname(filename)
extension = splitext(filename)[1][1:]
data = []
y = []
if(extension == "ds"):
content = open(filename).read().splitlines()
for i in range(0, len(content)):
tmp = content[i].split(' ')
data.append(loadCT(dirname_dataset + '/' + tmp[0]))
y.append(float(tmp[1]))
elif(extension == "cxl"):
import xml.etree.ElementTree as ET

tree = ET.parse(filename)
root = tree.getroot()
data = []
y = []
for graph in root.iter('print'):
mol_filename = graph.attrib['file']
mol_class = graph.attrib['class']
data.append(loadGXL(dirname_dataset + '/' + mol_filename))
y.append(mol_class)

return data, y

+ 10
- 0
utils/utils.py View File

@@ -0,0 +1,10 @@
import networkx as nx
import numpy as np


def getSPLengths(G1):
sp = nx.shortest_path(G1)
distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
for i in np.keys():
for j in np[i].keys():
distances[i, j] = len(sp[i][j])-1

Loading…
Cancel
Save