diff --git a/ged/GED.py b/ged/GED.py new file mode 100644 index 0000000..69dbeb2 --- /dev/null +++ b/ged/GED.py @@ -0,0 +1,74 @@ +from ged.costfunctions import BasicCostFunction, RiesenCostFunction +from ged.costfunctions import NeighboorhoodCostFunction +from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping + + +def ged(G1, G2, method='Riesen', rho=None, varrho=None, + cf=BasicCostFunction(1, 3, 1, 3)): + """Compute Graph Edit Distance between G1 and G2 according to mapping + encoded within rho and varrho. Graph's node must be indexed by a + index which is used is rho and varrho + NB: Utilisation de + dictionnaire pour etre plus versatile ? + + """ + if ((rho is None) or (varrho is None)): + if(method == 'Riesen'): + cf_bp = RiesenCostFunction(cf) + elif(method == 'Neighboorhood'): + cf_bp = NeighboorhoodCostFunction(cf) + elif(method == 'Basic'): + cf_bp = cf + else: + raise NameError('Non existent method ') + + rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp)) + + n = G1.number_of_nodes() + m = G2.number_of_nodes() + ged = 0 + for i in G1.nodes_iter(): + phi_i = rho[i] + if(phi_i >= m): + ged += cf.cnd(i, G1) + else: + ged += cf.cns(i, phi_i, G1, G2) + for j in G2.nodes_iter(): + phi_j = varrho[j] + if(phi_j >= n): + ged += cf.cni(j, G2) + + for e in G1.edges_iter(data=True): + i = e[0] + j = e[1] + phi_i = rho[i] + phi_j = rho[j] + if (phi_i < m) and (phi_j < m): + mappedEdge = len(list(filter(lambda x: True if + x == phi_j else False, G2[phi_i]))) + if(mappedEdge): + e2 = [phi_i, phi_j, G2[phi_i][phi_j]] + min_cost = min(cf.ces(e, e2, G1, G2), + cf.ced(e, G1), cf.cei(e2, G2)) + ged += min_cost + else: + ged += cf.ced(e, G1) + else: + ged += cf.ced(e, G1) + for e in G2.edges_iter(data=True): + i = e[0] + j = e[1] + phi_i = varrho[i] + phi_j = varrho[j] + if (phi_i < n) and (phi_j < n): + mappedEdge = len(list(filter(lambda x: True if x == phi_j + else False, G1[phi_i]))) + if(not mappedEdge): + ged += cf.cei(e, G2) + else: + ged += cf.ced(e, G2) + return ged, rho, varrho + + +def computeDistanceMatrix(dataset): + pass diff --git a/ged/bipartiteGED.py b/ged/bipartiteGED.py new file mode 100644 index 0000000..2599eb0 --- /dev/null +++ b/ged/bipartiteGED.py @@ -0,0 +1,33 @@ +import numpy as np +from scipy.optimize import linear_sum_assignment +from ged.costfunctions import BasicCostFunction + + +def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): + """Compute a Cost Matrix according to cost function cf""" + n = G1.number_of_nodes() + m = G2.number_of_nodes() + nm = n + m + C = np.ones([nm, nm])*np.inf + C[n:, m:] = 0 + + for u in G1.nodes_iter(): + for v in G2.nodes_iter(): + cost = cf.cns(u, v, G1, G2) + C[u, v] = cost + + for v in G1.nodes_iter(): + C[v, m + v] = cf.cnd(v, G1) + + for v in G2.nodes_iter(): + C[n + v, v] = cf.cni(v, G2) + return C + + +def getOptimalMapping(C): + """Compute an optimal linear mapping according to cost Matrix C + inclure les progs C de Seb + + """ + row_ind, col_ind = linear_sum_assignment(C) + return col_ind, row_ind[np.argsort(col_ind)] diff --git a/ged/costfunctions.py b/ged/costfunctions.py new file mode 100644 index 0000000..4d2be90 --- /dev/null +++ b/ged/costfunctions.py @@ -0,0 +1,133 @@ +import numpy as np +from scipy.optimize import linear_sum_assignment + + +class BasicCostFunction: + def __init__(self, cns, cni, ces, cei): + self.cns_ = cns + self.cni_ = self.cnd_ = cni + self.ces_ = ces + self.cei_ = self.ced_ = cei + + def cns(self, u, v, G1, G2): + return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_ + + def cnd(self, u, G1): + return self.cnd_ + + def cni(self, v, G2): + return self.cni_ + + def ces(self, e1, e2, G1, G2): + """tester avec des attributs autres que symboliques en testant + l'operateur __eq__""" + return (e1[2]['label'] != e2[2]['label'])*self.ces_ + + def ced(self, e1, G1): + return self.ced_ + + def cei(self, e2, G2): + return self.cei_ + + +class RiesenCostFunction(BasicCostFunction): + def __init__(self, cf): + BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) + + def cns(self, u, v, G1, G2): + """ u et v sont des id de noeuds """ + n = len(G1[u]) + m = len(G2[v]) + sub_C = np.ones([n+m, n+m]) * np.inf + sub_C[n:, m:] = 0 + i = 0 + l_nbr_u = G1[u] + l_nbr_v = G2[v] + for nbr_u in l_nbr_u: + j = 0 + e1 = [u, nbr_u, G1[u][nbr_u]] + for nbr_v in G2[v]: + e2 = [v, nbr_v, G2[v][nbr_v]] + sub_C[i, j] = self.ces(e1, e2, G1, G2) + j += 1 + i += 1 + + i = 0 + for nbr_u in l_nbr_u: + sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) + i += 1 + + j = 0 + for nbr_v in l_nbr_v: + sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) + j += 1 + row_ind, col_ind = linear_sum_assignment(sub_C) + cost = np.sum(sub_C[row_ind, col_ind]) + return BasicCostFunction.cns(self, u, v, G1, G2) + cost + + def cnd(self, u, G1): + cost = 0 + for nbr in G1[u]: + cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1) + + return BasicCostFunction.cnd(self,u,G1) + cost + + def cni(self, v, G2): + cost = 0 + for nbr in G2[v]: + cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2) + + return BasicCostFunction.cni(self, v, G2) + cost + + +class NeighboorhoodCostFunction(BasicCostFunction): + def __init__(self, cf): + BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) + + def cns(self, u, v, G1, G2): + """ u et v sont des id de noeuds """ + n = len(G1[u]) + m = len(G2[v]) + sub_C = np.ones([n+m, n+m]) * np.inf + sub_C[n:, m:] = 0 + i = 0 + l_nbr_u = G1[u] + l_nbr_v = G2[v] + for nbr_u in l_nbr_u: + j = 0 + e1 = [u, nbr_u, G1[u][nbr_u]] + for nbr_v in G2[v]: + e2 = [v, nbr_v, G2[v][nbr_v]] + sub_C[i, j] = self.ces(e1, e2, G1, G2) + sub_C[i, j] += BasicCostFunction.cns(self, + nbr_u, nbr_v, G1, G2) + j += 1 + i += 1 + + i = 0 + for nbr_u in l_nbr_u: + sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) + sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1) + i += 1 + + j = 0 + for nbr_v in l_nbr_v: + sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) + sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2) + j += 1 + + row_ind, col_ind = linear_sum_assignment(sub_C) + cost = np.sum(sub_C[row_ind, col_ind]) + return BasicCostFunction.cns(self, u, v, G1, G2) + cost + + def cnd(self, u, G1): + cost = 0 + for nbr in G1[u]: + cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1) + return BasicCostFunction.cnd(self, u, G1) + cost + + def cni(self, v, G2): + cost = 0 + for nbr in G2[v]: + cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2) + return BasicCostFunction.cni(self, v, G2) + cost diff --git a/kernels/.gitignore b/kernels/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/paths.py b/notebooks/paths.py new file mode 100644 index 0000000..d8cfdf5 --- /dev/null +++ b/notebooks/paths.py @@ -0,0 +1,3 @@ +import sys +import pathlib +sys.path.insert(0, "../") diff --git a/notebooks/test_lib.ipynb b/notebooks/test_lib.ipynb new file mode 100644 index 0000000..653a43b --- /dev/null +++ b/notebooks/test_lib.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import paths\n", + "\n", + "from ged.GED import ged\n", + "from utils.graphfiles import loadDataset\n", + "from ged.costfunctions import RiesenCostFunction, BasicCostFunction\n", + "from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt8VNW99/HPL9BIohKpwOOFq6dclALaBqqvB60Va+Um\nhVgOnKKlx8oxIE89CtUAcrdQudTKJYrnqUFqq9ZE4FQQL209lIImKAWBogHtIcIxqcV4ISCXdf7Y\nEwzJJDNJZrJndr7v1ysvM3uv2fPbzuTLmrX3Xtucc4iISLCk+F2AiIjEnsJdRCSAFO4iIgGkcBcR\nCSCFu4hIACncRUQCSOEuIhJACncRkQBSuIuIBFBLv164bdu2rkuXLn69vIhIUtq2bdvfnXPtIrXz\nLdy7dOlCUVGRXy8vIpKUzOxv0bTTsIyISAAp3EVEAkjhLiISQAp3EZEAUriLiASQwl1EJIAU7iIi\nAaRwFxEJIN8uYhI5Q2kp5OXBjh1QXg4ZGdCnD/zwh9Au4sV4IlKNwl38VVgI8+fDhg3e46NHv1hX\nUAAzZ8KgQZCTA/36+VOjSBLSsIz4JzcXrr0W1qzxQr1qsANUVHjL1qzx2uXm+lGlSFJSz138kZsL\nkyfDkSOR2zrntZs82XucnR3f2kQCQD13aXqFhTWCfRmQCZwFjKvteZUBrwnnRCJSuEvTmz/fG3Kp\n4iJgOvCvkZ5bUeE9X0TqFDHczeyXZlZqZm/Vst7M7GEzKzazHWb2tdiXKYFRWuodPHXujMUjge8C\n50d6vnOwfj2UlcWpQJFgiKbnngfcWMf6QUC30M94QEe9pHZ5eY3fhllstiMSYBHD3Tn3X8A/6mgy\nHHjCebYC55nZhbEqUAJmx46aZ8XUV0UF7NwZm3pEAioWY+4XAweqPC4JLavBzMabWZGZFZXpa3Xz\nVF4em+0cPhyb7YgEVCzC3cIsc2GW4Zxb6ZzLdM5lttNVh81TRkZsttOmTWy2IxJQsQj3EqBjlccd\ngIMx2K4EUZ8+0KpVjcUngKPAydDP0dCysNLSoHfveFUoEgixCPd1wK2hs2auBMqdc4disF0JonHj\nwi6eB6QBC4BfhX6fV9s2nKt1OyLiieZUyN8AW4AeZlZiZreZ2R1mdkeoyXpgP1AMPAZMiFu1kvza\nt/fmirEzR/Nm4Y3lVf2ZFe75ZjB4sCYTE4kg4vQDzrkxEdY7YGLMKpLgy8mBjRujm3qgurQ07/ki\nUiddoSpNr18/WLQI0tPr97z0dO95mZnxqUskQBTu4o/s7C8C3sKdcPWFk4BLS/Paa9Iwkago3MU/\n2dnw6qswYoR3Bk1a2pnr09KgVSt2XnIJ82+8UcEuUg+a8lf8lZkJ+fneXDF5ed6Vp4cPe+ex9+4N\n48bRNTWVZZdeynVbt3LllVf6XbFIUlC4S2Jo1w6mTAm7KgNYtGgR2dnZFBYW0rKlPrYikWhYRpLC\nmDFj+PKXv8yKFSv8LkUkKSjcJSmYGcuXL2fu3LkcPKgLoEUiUbhL0ujZsyfjx4/nnnvu8bsUkYSn\ncJekMm3aNLZu3cpLL73kdykiCU3hLkklPT2dpUuXMnHiRI4dO+Z3OSIJS+EuSWfo0KH06tWLBx98\n0O9SRBKWwl2S0i9+8Qt+8YtfsG/fPr9LEUlICndJSp06deInP/kJkyZNwrmw94YRadYU7pK07rrr\nLv72t7/x3HPP+V2KSMJRuEvSSk1NJTc3l7vuuotPP/3U73JEEorCXZLaNddcw7e+9S1mz57tdyki\nCUXhLklv4cKFrFq1ip07d/pdikjCULhL0mvfvj1z5swhOzubU6dO+V2OSEJQuEsg3H777Rw/fpxV\nq1b5XYpIQlC4SyC0aNGC3NxccnJy+PDDD/0uR8R3CncJjK997WuMGjWKHN1AW0ThLsEyd+5cnn/+\nebZu3ep3KSK+UrhLoGRkZLBo0SLuuOMOTpw44Xc5Ir5RuEvgjB49mrZt27J8+XK/SxHxjcJdAqfy\nrk3z5s3TXZuk2VK4SyD16NGDf/u3f+Puu+/2uxQRXyjcJbCmTZvG66+/rrs2SbMUVbib2Y1mttfM\nis3svjDrO5nZH8zsTTPbYWaDY1+qSP2kpaWxdOlSJkyYwNGjR/0uR6RJRQx3M2sBLAcGAZcBY8zs\nsmrNpgPPOOeuAEYDK2JdqEhDDBkyhN69e+uuTdLsRNNz7w8UO+f2O+c+B54Chldr44DWod8zAB3F\nkoTx0EMP8fDDD+uuTdKsRBPuFwMHqjwuCS2rahYw1sxKgPXApHAbMrPxZlZkZkVlZWUNKFek/jp1\n6sS9997LnXfeqbs2SbMRTbhbmGXV/0LGAHnOuQ7AYGC1mdXYtnNupXMu0zmX2a5du/pXK9JAd911\nFwcOHKCgoMDvUkSaRDThXgJ0rPK4AzWHXW4DngFwzm0BWgFtY1GgSCx86UtfOn3Xpk8++cTvckTi\nLppwLwS6mVlXM0vFO2C6rlqb/wYGApjZpXjhrnEXSShXX301AwcO1F2bpFmIGO7OuRPAncBGYA/e\nWTG7zGyOmd0UanYPcLuZ/QX4DTDOaXBTEtDChQt54okn2LFjh9+liMSV+ZXBmZmZrqioyJfXlubt\n0Ucf5YknnmDTpk2kpOg6PkkuZrbNOZcZqZ0+2dLs3H777Zw4cYK8vDy/SxGJG4W7NDspKSk88sgj\numuTBJrCXZqlK664gtGjR3PffTVm0xAJBIW7NFtz5sxh/fr1bNmyxe9SRGJO4S7NVkZGBosXLyY7\nO1t3bZLAUbhLs/bP//zPtG3blmXLlvldikhMKdylWTMzVqxYwbx583j//ff9LkckZhTu0ux1796d\n7Oxs3bVJAkXhLgJMnTqVwsJCXnzxRb9LEYkJhbsI3l2bli1bxsSJE3XXJgkEhbtIyODBg+nTpw8/\n+9nP/C5FpNEU7iJVPPTQQyxdupTi4mK/SxFpFIW7SBUdO3bkvvvu012bJOkp3EWq+fGPf8z7779P\nfn6+36WINJjCXaSayrs2/fu//7vu2iRJS+EuEsaAAQO4/vrrmTVrlt+liDSIwl2kFg8++CCrV6/W\nXZskKSncRWrRrl075s2bR3Z2NqdOnfK7HJF6UbiL1OFHP/oRJ0+e5PHHH/e7FJF6UbiL1CElJYXc\n3FymTp3K3//+d7/LEYmawl0kgiuuuIIxY8bork2SVBTuIlGYM2cOL7zwAn/+85/9LkUkKgp3kSi0\nbt1ad22SpKJwF4nSqFGjaN++PUuXLvW7FJGIFO4iUTIzli9fzgMPPEBJSYnf5YjUSeEuUg/du3dn\nwoQJumuTJLyowt3MbjSzvWZWbGZhTxkws1FmttvMdpnZr2NbpkjiyMnJYdu2bWzcuNHvUkRqFTHc\nzawFsBwYBFwGjDGzy6q16QbkAP/XOdcLuCsOtYokBN21SZJBND33/kCxc26/c+5z4ClgeLU2twPL\nnXOHAZxzpbEtUySxDBo0iL59+7JgwQK/SxEJK5pwvxg4UOVxSWhZVd2B7ma22cy2mtmNsSpQJFE9\n9NBDLFu2jHfeecfvUkRqiCbcLcyy6reoaQl0A64FxgD/YWbn1diQ2XgzKzKzorKysvrWKpJQdNcm\nSWTRhHsJ0LHK4w7AwTBt1jrnjjvn3gX24oX9GZxzK51zmc65zHbt2jW0ZpGE8eMf/5iDBw/y7LPP\n+l2KyBmiCfdCoJuZdTWzVGA0sK5amzXAtwDMrC3eMM3+WBYqkoiq3rXp448/9rsckdNaRmrgnDth\nZncCG4EWwC+dc7vMbA5Q5JxbF1p3g5ntBk4CU5xzH8azcJFEMWDAAG644QZmzZrFkiVL/C5H4q20\nFPLyYMcOKC+HjAzo0wd++ENIoBEJ82usMDMz0xUVFfny2iKxVlZWxle/+lVefPFF+vbtmzQBIPVQ\nWAjz58OGDd7jqqfBpqWBczBoEOTkQL9+cSvDzLY55zIjtlO4i8TGY489xtalS/mPf/on7IUXvIU+\nBYDEWG4uTJ4MFRXee1gbM+99XrQIsrPjUkq04a7pB0Ri5Lbjx1m2axdu7Vov1Ktf4FRR4S1bswau\nvdYLDEl8lcF+5EjdwQ7e+iNHvPY+v78Kd5FYyM0lZcoU0k6dIiUUAGOBC4HWeGcY/Edl2wQKAImg\nsPCLYK/iH8AI4GygM1BjvpXK99fH0QmFu0hj1RIAOcB7wMd4p5dNB7ZVbZAAASARzJ/vfeOqZiKQ\nCnwAPAlkA7uqN6qo8J7vE4W7SGPVEgC9gLNCv1voZ1/1Rj4HgNShtNQ7eFptKOYzIB+YC5wDDABu\nAlZXf75zsH49+HTBpsJdpDFqCYBKE4B0oCfeEM3g6g18DgCpQ15e2MVv450T3r3Ksr6E6bmDd4C1\nlu3EW8Tz3EWkDhH+cFcAS4EtwB/5oid/hsoAmDLljMXOuYg/frRrLq/5jf/8T7qEmfXzUyCj2rIM\n4JNw721FBezcGW5N3CncRRpjx46aZ8VU0wLvq/uvgFzg/1VvUFHB6p/8hB/ce2/YOWrMrM6faNrE\nul1zeM2v7NtHlzDv5zl4x1Gq+hg4t7YPwOHDta2JK4W7SGOUl0fd9ARhxtxDvj9kCP+ydm2N8BEf\njR0LTz5ZY3F3vPfyHb6YQOsveMdYwmrTJh7VRaQxd5HGyKj+Bd1Tinfjg0/x5uPYCPwGuK6WzaR8\n+cu0aNGClJQUBXui6NMHWrWqsfhsYCQwA+/g6mZgLXBLuG2kpUHv3nEssnYKd5HGqCUADG8IpgPQ\nBpgMPETNu9wAvgaA1GHcuFpXrQAqgPZ4c5znUkvP3bk6txNPCneRxqjlD7cd8CrwEd547E6825WF\n5WMASB3at/emigjzTerLeFPhfgb8N/Av4Z5vBoMH+zaXkMJdpDHqCICo+BwAEkFOjvfNqiHS0rzn\n+0ThLtJYSRwAEkG/ft4kYOnp9Xteerr3vMyI83vFjcJdpLGSOAAkCtnZX7y/kb6hmX3xvsZpVsho\nKdxFYiE7m/cmTeII4JIoACRK2dnw6qswYoR3AL36N7W0NG/5iBFeuwR4X3Weu0iM3LF9O/86ZQqj\n9u3zphQwO3POmcr53AcP9oZi1GNPLpmZkJ/vTRWRl+ddeXr4sHcee+/e3kHxBDp2onAXiYFNmzax\nd+9evrtuHaSmJk0ASAO0a1djqohEpHAXaSTnHNOmTWPmzJmkpqZ6C5MkACS4NOYu0kgvvvgipaWl\njB071u9SRE5TuIs0gnOO6dOnM2fOHFq21BdhSRwKd5FGWLNmDcePH+fmm2/2uxSRM6irIdJAJ0+e\n5P7772fBggWkpKifJIlFn0iRBnrqqac499xzGTJkiN+liNSgnrtIAxw/fpyZM2eycuVKTdErCUk9\nd5EGyMvLo3Pnzlx3XW0ztIv4Sz13kXo6evQoc+fO5ZlnnvG7FJFaRdVzN7MbzWyvmRWb2X11tLvZ\nzJyZ6bpqCaxHH32Uvn37cuWVV/pdikitIvbczawFsBz4NlACFJrZOufc7mrtzsW79+9r8ShUJBF8\n+umnzJ8/n40bN/pdikidoum59weKnXP7nXOf490aMtzdwuYCDwJ13wpeJIk9/PDDXHvttfTt29fv\nUkTqFM2Y+8XAgSqPS4BvVG1gZlcAHZ1zvzOzyTGsTyRhfPTRR/z85z/nT3/6k9+liEQUTc893Hle\n7vRKsxTg58A9ETdkNt7MisysqKysLPoqRRLAokWLGDZsGD169PC7FJGIoum5lwAdqzzuABys8vhc\n4KvAH0Pn+14ArDOzm5xzRVU35JxbCawEyMzMdIgkidLSUnJzc9m2bZvfpYhEJZqeeyHQzcy6mlkq\nMBpYV7nSOVfunGvrnOvinOsCbAVqBLtIMluwYAFjxoyhS5cufpciEpWIPXfn3AkzuxPYCLQAfumc\n22Vmc4Ai59y6urcgktxKSkrIy8tj165dfpciErWoLmJyzq0H1ldbNqOWttc2viyRxDFv3jx+9KMf\nceGFF/pdikjUdIWqSB3279/Pb3/7W95++22/SxGpF80tI1KHWbNmMWnSJM4//3y/SxGpF/XcRWqx\ne/duXnjhBYqLi/0uRaTe1HMXqcWMGTOYMmUKrVu39rsUkXpTz10kjG3btrFlyxaeeOIJv0sRaRD1\n3EXCmD59OlOnTiU9Pd3vUkQaRD13kWr+9Kc/sWfPHtauXet3KSINpp67SBXOOaZNm8bMmTNJTU31\nuxyRBlO4i1Tx0ksv8cEHH3DLLbf4XYpIoyjcRUIqe+2zZ8+mZUuNWEpyU7iLhKxdu5bjx4/zve99\nz+9SRBpN3RMR4OTJk9x///389Kc/JSVFfR5JfvoUiwBPP/00Z599NkOHDvW7FJGYUM9dmr3jx48z\nc+ZMHnnkEUI3nBFJeuq5S7O3atUqOnbsyMCBA/0uRSRm1HOXZu3YsWPMmTOHp59+2u9SRGJKPXdp\n1h599FH69u3LVVdd5XcpIjGlnrs0W5999hnz589nw4YNfpciEnPquUuz9fDDD3PNNddw+eWX+12K\nSMyp5y7N0kcffcSSJUvYtGmT36WIxIV67tIsLV68mKFDh9KzZ0+/SxGJC/XcpdkpKytjxYoVbNu2\nze9SROJGPXdpdhYsWMDo0aPp0qWL36WIxE3we+6lpZCXBzt2QHk5ZGRAnz7wwx9Cu3Z+VydN7P33\n3+fxxx/nrbfe8rsUkbgy55wvL5yZmemKiori9wKFhTB/PlSe5nb06Bfr0tLAORg0CHJyoF+/+NUh\nCSU7O5tzzjmHhQsX+l2KSIOY2TbnXGakdsHsuefmwuTJUFHhhXh1FRXef9esgY0bYdEiyM5u2hql\nye3fv59nnnmGvXv3+l2KSNwFL9wrg/3IkchtnfPaTZ7sPVbAB9rs2bOZNGkSbdu29bsUkbiL6oCq\nmd1oZnvNrNjM7guz/m4z221mO8zsFTPrHPtSo1BYWCPYjwG3AZ2Bc4ErgBrXI1YGfDyHicRXe/bs\nYcOGDdx9991+lyLSJCKGu5m1AJYDg4DLgDFmdlm1Zm8Cmc65PsCzwIOxLjQq8+d/MeQScgLoCLwK\nlANzgVHAe9WfW1HhPV8CacaMGUyePJnWrVv7XYpIk4im594fKHbO7XfOfQ48BQyv2sA59wfnXGV3\neSvQIbZlRqG01Dt4Wm2M/WxgFtAFb2eHAl2BGmc4Owfr10NZWdxLlab1xhtvsHnzZu68806/SxFp\nMtGE+8XAgSqPS0LLanMbYUY+4i4vL6pmHwBvA73CrTSLejuSPKZPn87UqVNJT0/3uxSRJhPNAdVw\nt6YJe/6kmY0FMoFv1rJ+PDAeoFOnTlGWGKUdO8483TGM48D3gR8AYS86r6iAnTtjW5f4avPmzeze\nvZvnnnvO71JEmlQ0PfcSvGHrSh2Ag9Ubmdn1wDTgJufcsXAbcs6tdM5lOucy28X6AqLy8jpXnwJu\nAVKBZXU1PHw4djWJr5xzTJs2jRkzZnDWWWf5XY5Ik4om3AuBbmbW1cxSgdHAuqoNzOwK4FG8YC+N\nfZlRyMiodZXDGyv6AMgHvlTXdtq0iWlZ4p+XX36ZQ4cOceutt/pdikiTixjuzrkTwJ3ARmAP8Ixz\nbpeZzTGzm0LNFgLnAL81s+1mtq6WzcVPnz7QqlXYVdl4hf8nkFbHJo61aMGbJ0/y8ccfx6FAaUqV\nvfbZs2fTsmXwLucQiSSq89ydc+udc92dc//knHsgtGyGc25d6PfrnXP/xzl3eejnprq3GAfjxoVd\n/De8rxTbgQvw/gU6B3gyTNsWKSksLC2lQ4cODB06lMcff5wPP/wwTgVLPK1bt45jx44xatQov0sR\n8UVwZoVs396bK8bOPP7bGW9Y5ijwaZWf71d/vhkthw3j1y+9RElJCd///vd5/vnnueSSS7j++utZ\nsWIFhw4daoIdkcY6deoU999/P3PnziUlJTgfcZH6CNYnPyfHmxSsIdLSvOcDrVu3ZsyYMTz77LMc\nOnSIiRMnsmXLFnr16sWAAQNYsmQJ7733Xuzqlph6+umnSU9PZ9iwYX6XIuKbYIV7v37eJGD1PZ85\nPd17XmbNidbS09MZMWIEq1ev5n/+53+YNm0ae/bsoX///nz961/npz/9KX/9619jtAPSWCdOnGDm\nzJk88MADmIU7i1ekeQhWuIM3+VdlwEf64zb7ItijmDQsNTWVQYMG8dhjj3Hw4EEWL17MoUOHGDhw\nIL169WLGjBls374dv6ZRFli1ahUdOnRg4MCBfpci4qvgzudeVOTNFbN+vRfiVeecqZzPffBgbygm\nTI+9Pk6dOsXrr79Ofn4++fn5mBlZWVmMHDmS/v37a9y3iRw7dozu3bvz1FNPcdVVV/ldjkhcRDuf\ne3DDvVJZmTelwM6d3gVKbdpA797e2TVxuBOTc46//OUv5OfnU1BQQHl5OSNGjCArK4urr76aFi1a\nxPw1xbN06VI2btzI7373O79LEYkbhXuC+Otf/0pBQQH5+fkcOHCA4cOHk5WVxXXXXUdqaqrf5QXG\nZ599xle+8hU2bNjA5Zdf7nc5InETbbhrvCDOevbsydSpU9m2bRuvv/46l156KXPnzuWCCy7glltu\n4bnnnuNINDcWkTotXbqUq6++WsEuEqKeu08OHjzImjVrKCgooKioiG9/+9uMHDmSIUOGaM7xevro\no4/o1q0bmzZtomfPsFPCiQSGeu4J7qKLLmLChAm8/PLLFBcXM2jQIJ588kldHdsAS5YsYciQIQp2\nkSrUc08w5eXlrF+/nvz8fF566SX69etHVlYW3/3ud7nwwgv9Li/hlJWV0bNnT4qKiujatavf5YjE\nnQ6oBsCRI0fYuHEj+fn5PP/88/Tq1YusrCxGjBhBly5d/C4vIUyePJkjR46wYsUKv0sRaRIK94D5\n/PPPeeWVVygoKGDt2rV06tSJkSNHkpWVRY8ePfwuzxcHDx6kd+/e7Ny5k4suusjvckSahMI9wE6c\nOMGmTZsoKCigoKCA8847j6ysLLKysujTp0+zuex+woQJnH322SxcuNDvUkSajMK9mTh16hSvvfba\n6XPpU1JSGDlyZOCvjn333XfJzMxk7969tG3b1u9yRJqMwr0Zqnp1bH5+Ph9//PHpoA/a1bHjxo2j\nc+fOzJ492+9SRJqUwl3Ys2fP6aGbIF0du2fPHr75zW/yzjvvkFHH7RVFgkjhLmd49913Twf9nj17\nGDJkCFlZWXznO98hraFz4Ptk1KhRfP3rX+fee+/1uxSRJqdwl1pVXh2bn59PUVERN9xwA1lZWQwe\nPDjhr4598803GTJkCO+88w5nn3223+WINDldoSq1qrw69pVXXmHfvn0MGjSI1atX06FDB4YNG5bQ\nV8dOnz6dnJwcBbtIBOq5y2nl5eU8//zz5Ofn8/LLL9O/f39GjhzJiBEjuOCCC/wujz//+c+MGTOG\nt99+m7POOsvvckR8oWEZaZQjR47wwgsvUFBQcMbVsSNHjqRz585NXo9zjuuuu46xY8dy2223Nfnr\niyQKhbvEzLFjx/j9739Pfn4+a9eupXPnzqeDPuZXx5aWejdX2bEDysshIwP69OHVSy7h9qlT2b17\nNy1btozta4okEYW7xEXl1bH5+fk899xztGnT5vQ0CI26Oraw0Lst4oYN3uOjR0+vcmlpfH70KKWZ\nmXRcvty7EbpIM6Vwl7irvDq28qKpFi1anA76fv36RX91bG4uTJ7s3ee2js+jM8PS0qK+oblIECnc\npUk559i+ffvpaRA++eST0/eOHTBgQO1Xx1YGe33uRpWeroCXZkvhLr6qvDo2Pz+f999/n+HDhzNy\n5Mgzr44tLIRrr6012N8BegM3A7+qvjI9HV59FTIjfsZFAiWm57mb2Y1mttfMis3svjDrzzKzp0Pr\nXzOzLvUvWYLk0ksvZdq0abzxxhts3bqVHj16nL537K233sqaNWs4OW+eNxRTi4lAraPrFRXeGL2I\nhBUx3M2sBbAcGARcBowxs8uqNbsNOOyc+wrwc+BnsS5UklfXrl2555572Lx5M2+99Rbf+MY3WL14\nMcfXrat1jP0p4DxgYG0bdQ7Wr4eysjhVLZLcoum59weKnXP7nXOf4/3dDa/WZjiwKvT7s8BAay6T\niku9XHTRRUycOJH8YcM4q1WrsG0+BmYAiyNtzMw7bVJEaogm3C8GDlR5XBJaFraNc+4EUA6cH4sC\nJaB27MCqnO5Y1f14XwU7RtpGRQXs3BnjwkSCIZqrQcL1wKt/l46mDWY2HhgP0KlTpyheWgKrvDzs\n4u3Ay8Cb0W7n8OEYFSQSLNGEewlndqI6AAdraVNiZi2BDOAf1TfknFsJrATvbJmGFCwBUcs87H8E\n3gMq/+n/FDgJ7AbeCPeENm1iXppIEEQzLFMIdDOzrmaWCowG1lVrsw74Qej3m4HfO7/OsZTk0KcP\nhBlzHw/sw+vBbwfuAIYAG8NtIy0NeveOY5EiyStiuIfG0O/E+/vaAzzjnNtlZnPM7KZQs/8PnG9m\nxcDdQI3TJUXOMG5c2MXpwAVVfs4BWgHtwjV2rtbtiDR3Uc3A5JxbD6yvtmxGld+PAt+LbWkSaO3b\nw6BBsGZNnVMOzKpthRkMHgztwsa+SLOnm3WIf3JyvKGVhkhL854vImEp3MU//fp5c8Skp9fveZVz\ny2jqAZFaaWJs8Vfl5F9RzAqJmddj16RhIhGp5y7+y872JgEbMcI7g6b6UE1amrd8xAivnYJdJCL1\n3CUxZGZCfr43V0xennfl6eHD3nnsvXt7Z8Xo4KlI1BTukljatYMpU/yuQiTpaVhGRCSAFO4iIgGk\ncBcRCSCFu4hIACncRUQCSOEuIhJACncRkQBSuIuIBJD5dU8NMysD/tbEL9sW+HsTv6YftJ/B0Rz2\nEbSf9dHZORfxcm3fwt0PZlbknAv8VILaz+BoDvsI2s940LCMiEgAKdxFRAKouYX7Sr8LaCLaz+Bo\nDvsI2s+Ya1Zj7iIizUVz67mLiDQLgQx3M7vRzPaaWbGZ3Rdm/Vlm9nRo/Wtm1qXpq2y8KPbzbjPb\nbWY7zOwVM+vsR52NEWkfq7S72cycmSXlGRfR7KeZjQq9n7vM7NdNXWMsRPGZ7WRmfzCzN0Of28F+\n1NkYZvYUS70MAAADKUlEQVRLMys1s7dqWW9m9nDo/8EOM/taXApxzgXqB2gB7AMuAVKBvwCXVWsz\nAXgk9Pto4Gm/647Tfn4LSA/9np1s+xnNPobanQv8F7AVyPS77ji9l92AN4E2ocft/a47Tvu5EsgO\n/X4Z8J7fdTdgP68Bvga8Vcv6wcAGwIArgdfiUUcQe+79gWLn3H7n3OfAU8Dwam2GA6tCvz8LDDQz\na8IaYyHifjrn/uCcOxJ6uBXo0MQ1NlY07yXAXOBB4GhTFhdD0ezn7cBy59xhAOdcaRPXGAvR7KcD\nWod+zwAONmF9MeGc+y/gH3U0GQ484TxbgfPM7MJY1xHEcL8YOFDlcUloWdg2zrkTQDlwfpNUFzvR\n7GdVt+H1FpJJxH00syuAjs653zVlYTEWzXvZHehuZpvNbKuZ3dhk1cVONPs5CxhrZiXAemBS05TW\npOr7t9sgQbyHargeePVTgqJpk+ii3gczGwtkAt+Ma0WxV+c+mlkK8HNgXFMVFCfRvJct8YZmrsX7\nBrbJzL7qnPsozrXFUjT7OQbIc84tNrOrgNWh/TwV//KaTJPkTxB77iVAxyqPO1Dzq93pNmbWEu/r\nX11foxJRNPuJmV0PTANucs4da6LaYiXSPp4LfBX4o5m9hzd+uS4JD6pG+5ld65w77px7F9iLF/bJ\nJJr9vA14BsA5twVohTcfS5BE9bfbWEEM90Kgm5l1NbNUvAOm66q1WQf8IPT7zcDvXehIRxKJuJ+h\nIYtH8YI9Gcdo69xH51y5c66tc66Lc64L3nGFm5xzRf6U22DRfGbX4B0gx8za4g3T7G/SKhsvmv38\nb2AggJldihfuZU1aZfytA24NnTVzJVDunDsU81fx+8hynI5WDwbexjsyPy20bA7eHz54H5jfAsXA\n68Alftccp/18GfgA2B76Wed3zbHex2pt/0gSni0T5XtpwBJgN7ATGO13zXHaz8uAzXhn0mwHbvC7\n5gbs42+AQ8BxvF76bcAdwB1V3svlof8HO+P1mdUVqiIiARTEYRkRkWZP4S4iEkAKdxGRAFK4i4gE\nkMJdRCSAFO4iIgGkcBcRCSCFu4hIAP0v8ncXPBCruC4AAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 1, 4]\n" + ] + } + ], + "source": [ + "import networkx as nx\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")\n", + "nx.draw_networkx(dataset[12])\n", + "plt.show()\n", + "print(list(dataset[12][3]))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "cf = BasicCostFunction(1,3,1,3)\n", + "N=len(dataset)\n", + "N=10\n", + "ged_distances = np.zeros((N,N))\n", + "for i in range(0,N):\n", + " for j in range(i,N):\n", + " ged_distances[j,i] = ged_distances[i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n", + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n", + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n" + ] + } + ], + "source": [ + "G1=dataset[12]\n", + "G2=dataset[12]\n", + "\n", + "print(ged(G1,G2))\n", + "print(ged(G1,G2,method='Basic'))\n", + "print(ged(G1,G2,method='Riesen'))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0: {0: [0], 3: [0, 3], 1: [0, 3, 1], 4: [0, 3, 4], 2: [0, 3, 4, 2]}, 1: {1: [1], 3: [1, 3], 0: [1, 3, 0], 4: [1, 3, 4], 2: [1, 3, 4, 2]}, 2: {2: [2], 4: [2, 4], 3: [2, 4, 3], 0: [2, 4, 3, 0], 1: [2, 4, 3, 1]}, 3: {3: [3], 0: [3, 0], 1: [3, 1], 4: [3, 4], 2: [3, 4, 2]}, 4: {4: [4], 2: [4, 2], 3: [4, 3], 0: [4, 3, 0], 1: [4, 3, 1]}}\n", + "[[ 0. 2. 3. 1. 2.]\n", + " [ 2. 0. 3. 1. 2.]\n", + " [ 3. 3. 0. 2. 1.]\n", + " [ 1. 1. 2. 0. 1.]\n", + " [ 2. 2. 1. 1. 0.]]\n" + ] + } + ], + "source": [ + "l = nx.shortest_path(G1)\n", + "print(l)\n", + "\n", + "distances = np.zeros((G1.number_of_nodes(),G1.number_of_nodes()))\n", + "for i in l.keys():\n", + " for j in l[i].keys():\n", + " distances[i,j] = len(l[i][j])-1\n", + "\n", + "print(distances)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + }, + "name": "test_lib.ipynb" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/utils/graphfiles.py b/utils/graphfiles.py new file mode 100644 index 0000000..b561f38 --- /dev/null +++ b/utils/graphfiles.py @@ -0,0 +1,74 @@ +import networkx as nx + + +def loadCT(filename): + content = open(filename).read().splitlines() + G = nx.Graph(name=str(content[0])) + tmp = content[1].split(" ") + if tmp[0] == '': + nb_nodes = int(tmp[1]) + nb_edges = int(tmp[2]) + else: + nb_nodes = int(tmp[0]) + nb_edges = int(tmp[1]) + + for i in range(0, nb_nodes): + tmp = content[i + 2].split(" ") + tmp = [x for x in tmp if x != ''] + G.add_node(i, label=tmp[3]) + + for i in range(0, nb_edges): + tmp = content[i+G.number_of_nodes()+2].split(" ") + tmp = [x for x in tmp if x != ''] + G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) + return G + + +def loadGXL(filename): + import networkx as nx + import xml.etree.ElementTree as ET + + tree = ET.parse(filename) + root = tree.getroot() + index = 0 + G = nx.Graph() + dic={} + for node in root.iter('node'): + label = node.find('attr')[0].text + dic[node.attrib['id']] = index + G.add_node(index, id=node.attrib['id'], label=label) + index += 1 + + for edge in root.iter('edge'): + label = edge.find('attr')[0].text + G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) + return G + + +def loadDataset(filename): + from os.path import dirname, splitext + + dirname_dataset = dirname(filename) + extension = splitext(filename)[1][1:] + data = [] + y = [] + if(extension == "ds"): + content = open(filename).read().splitlines() + for i in range(0, len(content)): + tmp = content[i].split(' ') + data.append(loadCT(dirname_dataset + '/' + tmp[0])) + y.append(float(tmp[1])) + elif(extension == "cxl"): + import xml.etree.ElementTree as ET + + tree = ET.parse(filename) + root = tree.getroot() + data = [] + y = [] + for graph in root.iter('print'): + mol_filename = graph.attrib['file'] + mol_class = graph.attrib['class'] + data.append(loadGXL(dirname_dataset + '/' + mol_filename)) + y.append(mol_class) + + return data, y diff --git a/utils/utils.py b/utils/utils.py new file mode 100644 index 0000000..d9145b9 --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,10 @@ +import networkx as nx +import numpy as np + + +def getSPLengths(G1): + sp = nx.shortest_path(G1) + distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) + for i in np.keys(): + for j in np[i].keys(): + distances[i, j] = len(sp[i][j])-1