diff --git a/notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb new file mode 100644 index 0000000..0b7ae52 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import paths\n", + "\n", + "from ged.GED import ged\n", + "from utils.graphfiles import loadDataset\n", + "from ged.costfunctions import RiesenCostFunction, BasicCostFunction\n", + "from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt8VNW99/HPL9BIohKpwOOFq6dclALaBqqvB60Va+Um\nhVgOnKKlx8oxIE89CtUAcrdQudTKJYrnqUFqq9ZE4FQQL209lIImKAWBogHtIcIxqcV4ISCXdf7Y\nEwzJJDNJZrJndr7v1ysvM3uv2fPbzuTLmrX3Xtucc4iISLCk+F2AiIjEnsJdRCSAFO4iIgGkcBcR\nCSCFu4hIACncRUQCSOEuIhJACncRkQBSuIuIBFBLv164bdu2rkuXLn69vIhIUtq2bdvfnXPtIrXz\nLdy7dOlCUVGRXy8vIpKUzOxv0bTTsIyISAAp3EVEAkjhLiISQAp3EZEAUriLiASQwl1EJIAU7iIi\nAaRwFxEJIN8uYhI5Q2kp5OXBjh1QXg4ZGdCnD/zwh9Au4sV4IlKNwl38VVgI8+fDhg3e46NHv1hX\nUAAzZ8KgQZCTA/36+VOjSBLSsIz4JzcXrr0W1qzxQr1qsANUVHjL1qzx2uXm+lGlSFJSz138kZsL\nkyfDkSOR2zrntZs82XucnR3f2kQCQD13aXqFhTWCfRmQCZwFjKvteZUBrwnnRCJSuEvTmz/fG3Kp\n4iJgOvCvkZ5bUeE9X0TqFDHczeyXZlZqZm/Vst7M7GEzKzazHWb2tdiXKYFRWuodPHXujMUjge8C\n50d6vnOwfj2UlcWpQJFgiKbnngfcWMf6QUC30M94QEe9pHZ5eY3fhllstiMSYBHD3Tn3X8A/6mgy\nHHjCebYC55nZhbEqUAJmx46aZ8XUV0UF7NwZm3pEAioWY+4XAweqPC4JLavBzMabWZGZFZXpa3Xz\nVF4em+0cPhyb7YgEVCzC3cIsc2GW4Zxb6ZzLdM5lttNVh81TRkZsttOmTWy2IxJQsQj3EqBjlccd\ngIMx2K4EUZ8+0KpVjcUngKPAydDP0dCysNLSoHfveFUoEgixCPd1wK2hs2auBMqdc4disF0JonHj\nwi6eB6QBC4BfhX6fV9s2nKt1OyLiieZUyN8AW4AeZlZiZreZ2R1mdkeoyXpgP1AMPAZMiFu1kvza\nt/fmirEzR/Nm4Y3lVf2ZFe75ZjB4sCYTE4kg4vQDzrkxEdY7YGLMKpLgy8mBjRujm3qgurQ07/ki\nUiddoSpNr18/WLQI0tPr97z0dO95mZnxqUskQBTu4o/s7C8C3sKdcPWFk4BLS/Paa9Iwkago3MU/\n2dnw6qswYoR3Bk1a2pnr09KgVSt2XnIJ82+8UcEuUg+a8lf8lZkJ+fneXDF5ed6Vp4cPe+ex9+4N\n48bRNTWVZZdeynVbt3LllVf6XbFIUlC4S2Jo1w6mTAm7KgNYtGgR2dnZFBYW0rKlPrYikWhYRpLC\nmDFj+PKXv8yKFSv8LkUkKSjcJSmYGcuXL2fu3LkcPKgLoEUiUbhL0ujZsyfjx4/nnnvu8bsUkYSn\ncJekMm3aNLZu3cpLL73kdykiCU3hLkklPT2dpUuXMnHiRI4dO+Z3OSIJS+EuSWfo0KH06tWLBx98\n0O9SRBKWwl2S0i9+8Qt+8YtfsG/fPr9LEUlICndJSp06deInP/kJkyZNwrmw94YRadYU7pK07rrr\nLv72t7/x3HPP+V2KSMJRuEvSSk1NJTc3l7vuuotPP/3U73JEEorCXZLaNddcw7e+9S1mz57tdyki\nCUXhLklv4cKFrFq1ip07d/pdikjCULhL0mvfvj1z5swhOzubU6dO+V2OSEJQuEsg3H777Rw/fpxV\nq1b5XYpIQlC4SyC0aNGC3NxccnJy+PDDD/0uR8R3CncJjK997WuMGjWKHN1AW0ThLsEyd+5cnn/+\nebZu3ep3KSK+UrhLoGRkZLBo0SLuuOMOTpw44Xc5Ir5RuEvgjB49mrZt27J8+XK/SxHxjcJdAqfy\nrk3z5s3TXZuk2VK4SyD16NGDf/u3f+Puu+/2uxQRXyjcJbCmTZvG66+/rrs2SbMUVbib2Y1mttfM\nis3svjDrO5nZH8zsTTPbYWaDY1+qSP2kpaWxdOlSJkyYwNGjR/0uR6RJRQx3M2sBLAcGAZcBY8zs\nsmrNpgPPOOeuAEYDK2JdqEhDDBkyhN69e+uuTdLsRNNz7w8UO+f2O+c+B54Chldr44DWod8zAB3F\nkoTx0EMP8fDDD+uuTdKsRBPuFwMHqjwuCS2rahYw1sxKgPXApHAbMrPxZlZkZkVlZWUNKFek/jp1\n6sS9997LnXfeqbs2SbMRTbhbmGXV/0LGAHnOuQ7AYGC1mdXYtnNupXMu0zmX2a5du/pXK9JAd911\nFwcOHKCgoMDvUkSaRDThXgJ0rPK4AzWHXW4DngFwzm0BWgFtY1GgSCx86UtfOn3Xpk8++cTvckTi\nLppwLwS6mVlXM0vFO2C6rlqb/wYGApjZpXjhrnEXSShXX301AwcO1F2bpFmIGO7OuRPAncBGYA/e\nWTG7zGyOmd0UanYPcLuZ/QX4DTDOaXBTEtDChQt54okn2LFjh9+liMSV+ZXBmZmZrqioyJfXlubt\n0Ucf5YknnmDTpk2kpOg6PkkuZrbNOZcZqZ0+2dLs3H777Zw4cYK8vDy/SxGJG4W7NDspKSk88sgj\numuTBJrCXZqlK664gtGjR3PffTVm0xAJBIW7NFtz5sxh/fr1bNmyxe9SRGJO4S7NVkZGBosXLyY7\nO1t3bZLAUbhLs/bP//zPtG3blmXLlvldikhMKdylWTMzVqxYwbx583j//ff9LkckZhTu0ux1796d\n7Oxs3bVJAkXhLgJMnTqVwsJCXnzxRb9LEYkJhbsI3l2bli1bxsSJE3XXJgkEhbtIyODBg+nTpw8/\n+9nP/C5FpNEU7iJVPPTQQyxdupTi4mK/SxFpFIW7SBUdO3bkvvvu012bJOkp3EWq+fGPf8z7779P\nfn6+36WINJjCXaSayrs2/fu//7vu2iRJS+EuEsaAAQO4/vrrmTVrlt+liDSIwl2kFg8++CCrV6/W\nXZskKSncRWrRrl075s2bR3Z2NqdOnfK7HJF6UbiL1OFHP/oRJ0+e5PHHH/e7FJF6UbiL1CElJYXc\n3FymTp3K3//+d7/LEYmawl0kgiuuuIIxY8bork2SVBTuIlGYM2cOL7zwAn/+85/9LkUkKgp3kSi0\nbt1ad22SpKJwF4nSqFGjaN++PUuXLvW7FJGIFO4iUTIzli9fzgMPPEBJSYnf5YjUSeEuUg/du3dn\nwoQJumuTJLyowt3MbjSzvWZWbGZhTxkws1FmttvMdpnZr2NbpkjiyMnJYdu2bWzcuNHvUkRqFTHc\nzawFsBwYBFwGjDGzy6q16QbkAP/XOdcLuCsOtYokBN21SZJBND33/kCxc26/c+5z4ClgeLU2twPL\nnXOHAZxzpbEtUySxDBo0iL59+7JgwQK/SxEJK5pwvxg4UOVxSWhZVd2B7ma22cy2mtmNsSpQJFE9\n9NBDLFu2jHfeecfvUkRqiCbcLcyy6reoaQl0A64FxgD/YWbn1diQ2XgzKzKzorKysvrWKpJQdNcm\nSWTRhHsJ0LHK4w7AwTBt1jrnjjvn3gX24oX9GZxzK51zmc65zHbt2jW0ZpGE8eMf/5iDBw/y7LPP\n+l2KyBmiCfdCoJuZdTWzVGA0sK5amzXAtwDMrC3eMM3+WBYqkoiq3rXp448/9rsckdNaRmrgnDth\nZncCG4EWwC+dc7vMbA5Q5JxbF1p3g5ntBk4CU5xzH8azcJFEMWDAAG644QZmzZrFkiVL/C5H4q20\nFPLyYMcOKC+HjAzo0wd++ENIoBEJ82usMDMz0xUVFfny2iKxVlZWxle/+lVefPFF+vbtmzQBIPVQ\nWAjz58OGDd7jqqfBpqWBczBoEOTkQL9+cSvDzLY55zIjtlO4i8TGY489xtalS/mPf/on7IUXvIU+\nBYDEWG4uTJ4MFRXee1gbM+99XrQIsrPjUkq04a7pB0Ri5Lbjx1m2axdu7Vov1Ktf4FRR4S1bswau\nvdYLDEl8lcF+5EjdwQ7e+iNHvPY+v78Kd5FYyM0lZcoU0k6dIiUUAGOBC4HWeGcY/Edl2wQKAImg\nsPCLYK/iH8AI4GygM1BjvpXK99fH0QmFu0hj1RIAOcB7wMd4p5dNB7ZVbZAAASARzJ/vfeOqZiKQ\nCnwAPAlkA7uqN6qo8J7vE4W7SGPVEgC9gLNCv1voZ1/1Rj4HgNShtNQ7eFptKOYzIB+YC5wDDABu\nAlZXf75zsH49+HTBpsJdpDFqCYBKE4B0oCfeEM3g6g18DgCpQ15e2MVv450T3r3Ksr6E6bmDd4C1\nlu3EW8Tz3EWkDhH+cFcAS4EtwB/5oid/hsoAmDLljMXOuYg/frRrLq/5jf/8T7qEmfXzUyCj2rIM\n4JNw721FBezcGW5N3CncRRpjx46aZ8VU0wLvq/uvgFzg/1VvUFHB6p/8hB/ce2/YOWrMrM6faNrE\nul1zeM2v7NtHlzDv5zl4x1Gq+hg4t7YPwOHDta2JK4W7SGOUl0fd9ARhxtxDvj9kCP+ydm2N8BEf\njR0LTz5ZY3F3vPfyHb6YQOsveMdYwmrTJh7VRaQxd5HGyKj+Bd1Tinfjg0/x5uPYCPwGuK6WzaR8\n+cu0aNGClJQUBXui6NMHWrWqsfhsYCQwA+/g6mZgLXBLuG2kpUHv3nEssnYKd5HGqCUADG8IpgPQ\nBpgMPETNu9wAvgaA1GHcuFpXrQAqgPZ4c5znUkvP3bk6txNPCneRxqjlD7cd8CrwEd547E6825WF\n5WMASB3at/emigjzTerLeFPhfgb8N/Av4Z5vBoMH+zaXkMJdpDHqCICo+BwAEkFOjvfNqiHS0rzn\n+0ThLtJYSRwAEkG/ft4kYOnp9Xteerr3vMyI83vFjcJdpLGSOAAkCtnZX7y/kb6hmX3xvsZpVsho\nKdxFYiE7m/cmTeII4JIoACRK2dnw6qswYoR3AL36N7W0NG/5iBFeuwR4X3Weu0iM3LF9O/86ZQqj\n9u3zphQwO3POmcr53AcP9oZi1GNPLpmZkJ/vTRWRl+ddeXr4sHcee+/e3kHxBDp2onAXiYFNmzax\nd+9evrtuHaSmJk0ASAO0a1djqohEpHAXaSTnHNOmTWPmzJmkpqZ6C5MkACS4NOYu0kgvvvgipaWl\njB071u9SRE5TuIs0gnOO6dOnM2fOHFq21BdhSRwKd5FGWLNmDcePH+fmm2/2uxSRM6irIdJAJ0+e\n5P7772fBggWkpKifJIlFn0iRBnrqqac499xzGTJkiN+liNSgnrtIAxw/fpyZM2eycuVKTdErCUk9\nd5EGyMvLo3Pnzlx3XW0ztIv4Sz13kXo6evQoc+fO5ZlnnvG7FJFaRdVzN7MbzWyvmRWb2X11tLvZ\nzJyZ6bpqCaxHH32Uvn37cuWVV/pdikitIvbczawFsBz4NlACFJrZOufc7mrtzsW79+9r8ShUJBF8\n+umnzJ8/n40bN/pdikidoum59weKnXP7nXOf490aMtzdwuYCDwJ13wpeJIk9/PDDXHvttfTt29fv\nUkTqFM2Y+8XAgSqPS4BvVG1gZlcAHZ1zvzOzyTGsTyRhfPTRR/z85z/nT3/6k9+liEQUTc893Hle\n7vRKsxTg58A9ETdkNt7MisysqKysLPoqRRLAokWLGDZsGD169PC7FJGIoum5lwAdqzzuABys8vhc\n4KvAH0Pn+14ArDOzm5xzRVU35JxbCawEyMzMdIgkidLSUnJzc9m2bZvfpYhEJZqeeyHQzcy6mlkq\nMBpYV7nSOVfunGvrnOvinOsCbAVqBLtIMluwYAFjxoyhS5cufpciEpWIPXfn3AkzuxPYCLQAfumc\n22Vmc4Ai59y6urcgktxKSkrIy8tj165dfpciErWoLmJyzq0H1ldbNqOWttc2viyRxDFv3jx+9KMf\nceGFF/pdikjUdIWqSB3279/Pb3/7W95++22/SxGpF80tI1KHWbNmMWnSJM4//3y/SxGpF/XcRWqx\ne/duXnjhBYqLi/0uRaTe1HMXqcWMGTOYMmUKrVu39rsUkXpTz10kjG3btrFlyxaeeOIJv0sRaRD1\n3EXCmD59OlOnTiU9Pd3vUkQaRD13kWr+9Kc/sWfPHtauXet3KSINpp67SBXOOaZNm8bMmTNJTU31\nuxyRBlO4i1Tx0ksv8cEHH3DLLbf4XYpIoyjcRUIqe+2zZ8+mZUuNWEpyU7iLhKxdu5bjx4/zve99\nz+9SRBpN3RMR4OTJk9x///389Kc/JSVFfR5JfvoUiwBPP/00Z599NkOHDvW7FJGYUM9dmr3jx48z\nc+ZMHnnkEUI3nBFJeuq5S7O3atUqOnbsyMCBA/0uRSRm1HOXZu3YsWPMmTOHp59+2u9SRGJKPXdp\n1h599FH69u3LVVdd5XcpIjGlnrs0W5999hnz589nw4YNfpciEnPquUuz9fDDD3PNNddw+eWX+12K\nSMyp5y7N0kcffcSSJUvYtGmT36WIxIV67tIsLV68mKFDh9KzZ0+/SxGJC/XcpdkpKytjxYoVbNu2\nze9SROJGPXdpdhYsWMDo0aPp0qWL36WIxE3we+6lpZCXBzt2QHk5ZGRAnz7wwx9Cu3Z+VydN7P33\n3+fxxx/nrbfe8rsUkbgy55wvL5yZmemKiori9wKFhTB/PlSe5nb06Bfr0tLAORg0CHJyoF+/+NUh\nCSU7O5tzzjmHhQsX+l2KSIOY2TbnXGakdsHsuefmwuTJUFHhhXh1FRXef9esgY0bYdEiyM5u2hql\nye3fv59nnnmGvXv3+l2KSNwFL9wrg/3IkchtnfPaTZ7sPVbAB9rs2bOZNGkSbdu29bsUkbiL6oCq\nmd1oZnvNrNjM7guz/m4z221mO8zsFTPrHPtSo1BYWCPYjwG3AZ2Bc4ErgBrXI1YGfDyHicRXe/bs\nYcOGDdx9991+lyLSJCKGu5m1AJYDg4DLgDFmdlm1Zm8Cmc65PsCzwIOxLjQq8+d/MeQScgLoCLwK\nlANzgVHAe9WfW1HhPV8CacaMGUyePJnWrVv7XYpIk4im594fKHbO7XfOfQ48BQyv2sA59wfnXGV3\neSvQIbZlRqG01Dt4Wm2M/WxgFtAFb2eHAl2BGmc4Owfr10NZWdxLlab1xhtvsHnzZu68806/SxFp\nMtGE+8XAgSqPS0LLanMbYUY+4i4vL6pmHwBvA73CrTSLejuSPKZPn87UqVNJT0/3uxSRJhPNAdVw\nt6YJe/6kmY0FMoFv1rJ+PDAeoFOnTlGWGKUdO8483TGM48D3gR8AYS86r6iAnTtjW5f4avPmzeze\nvZvnnnvO71JEmlQ0PfcSvGHrSh2Ag9Ubmdn1wDTgJufcsXAbcs6tdM5lOucy28X6AqLy8jpXnwJu\nAVKBZXU1PHw4djWJr5xzTJs2jRkzZnDWWWf5XY5Ik4om3AuBbmbW1cxSgdHAuqoNzOwK4FG8YC+N\nfZlRyMiodZXDGyv6AMgHvlTXdtq0iWlZ4p+XX36ZQ4cOceutt/pdikiTixjuzrkTwJ3ARmAP8Ixz\nbpeZzTGzm0LNFgLnAL81s+1mtq6WzcVPnz7QqlXYVdl4hf8nkFbHJo61aMGbJ0/y8ccfx6FAaUqV\nvfbZs2fTsmXwLucQiSSq89ydc+udc92dc//knHsgtGyGc25d6PfrnXP/xzl3eejnprq3GAfjxoVd\n/De8rxTbgQvw/gU6B3gyTNsWKSksLC2lQ4cODB06lMcff5wPP/wwTgVLPK1bt45jx44xatQov0sR\n8UVwZoVs396bK8bOPP7bGW9Y5ijwaZWf71d/vhkthw3j1y+9RElJCd///vd5/vnnueSSS7j++utZ\nsWIFhw4daoIdkcY6deoU999/P3PnziUlJTgfcZH6CNYnPyfHmxSsIdLSvOcDrVu3ZsyYMTz77LMc\nOnSIiRMnsmXLFnr16sWAAQNYsmQJ7733Xuzqlph6+umnSU9PZ9iwYX6XIuKbYIV7v37eJGD1PZ85\nPd17XmbNidbS09MZMWIEq1ev5n/+53+YNm0ae/bsoX///nz961/npz/9KX/9619jtAPSWCdOnGDm\nzJk88MADmIU7i1ekeQhWuIM3+VdlwEf64zb7ItijmDQsNTWVQYMG8dhjj3Hw4EEWL17MoUOHGDhw\nIL169WLGjBls374dv6ZRFli1ahUdOnRg4MCBfpci4qvgzudeVOTNFbN+vRfiVeecqZzPffBgbygm\nTI+9Pk6dOsXrr79Ofn4++fn5mBlZWVmMHDmS/v37a9y3iRw7dozu3bvz1FNPcdVVV/ldjkhcRDuf\ne3DDvVJZmTelwM6d3gVKbdpA797e2TVxuBOTc46//OUv5OfnU1BQQHl5OSNGjCArK4urr76aFi1a\nxPw1xbN06VI2btzI7373O79LEYkbhXuC+Otf/0pBQQH5+fkcOHCA4cOHk5WVxXXXXUdqaqrf5QXG\nZ599xle+8hU2bNjA5Zdf7nc5InETbbhrvCDOevbsydSpU9m2bRuvv/46l156KXPnzuWCCy7glltu\n4bnnnuNINDcWkTotXbqUq6++WsEuEqKeu08OHjzImjVrKCgooKioiG9/+9uMHDmSIUOGaM7xevro\no4/o1q0bmzZtomfPsFPCiQSGeu4J7qKLLmLChAm8/PLLFBcXM2jQIJ588kldHdsAS5YsYciQIQp2\nkSrUc08w5eXlrF+/nvz8fF566SX69etHVlYW3/3ud7nwwgv9Li/hlJWV0bNnT4qKiujatavf5YjE\nnQ6oBsCRI0fYuHEj+fn5PP/88/Tq1YusrCxGjBhBly5d/C4vIUyePJkjR46wYsUKv0sRaRIK94D5\n/PPPeeWVVygoKGDt2rV06tSJkSNHkpWVRY8ePfwuzxcHDx6kd+/e7Ny5k4suusjvckSahMI9wE6c\nOMGmTZsoKCigoKCA8847j6ysLLKysujTp0+zuex+woQJnH322SxcuNDvUkSajMK9mTh16hSvvfba\n6XPpU1JSGDlyZOCvjn333XfJzMxk7969tG3b1u9yRJqMwr0Zqnp1bH5+Ph9//PHpoA/a1bHjxo2j\nc+fOzJ492+9SRJqUwl3Ys2fP6aGbIF0du2fPHr75zW/yzjvvkFHH7RVFgkjhLmd49913Twf9nj17\nGDJkCFlZWXznO98hraFz4Ptk1KhRfP3rX+fee+/1uxSRJqdwl1pVXh2bn59PUVERN9xwA1lZWQwe\nPDjhr4598803GTJkCO+88w5nn3223+WINDldoSq1qrw69pVXXmHfvn0MGjSI1atX06FDB4YNG5bQ\nV8dOnz6dnJwcBbtIBOq5y2nl5eU8//zz5Ofn8/LLL9O/f39GjhzJiBEjuOCCC/wujz//+c+MGTOG\nt99+m7POOsvvckR8oWEZaZQjR47wwgsvUFBQcMbVsSNHjqRz585NXo9zjuuuu46xY8dy2223Nfnr\niyQKhbvEzLFjx/j9739Pfn4+a9eupXPnzqeDPuZXx5aWejdX2bEDysshIwP69OHVSy7h9qlT2b17\nNy1btozta4okEYW7xEXl1bH5+fk899xztGnT5vQ0CI26Oraw0Lst4oYN3uOjR0+vcmlpfH70KKWZ\nmXRcvty7EbpIM6Vwl7irvDq28qKpFi1anA76fv36RX91bG4uTJ7s3ee2js+jM8PS0qK+oblIECnc\npUk559i+ffvpaRA++eST0/eOHTBgQO1Xx1YGe33uRpWeroCXZkvhLr6qvDo2Pz+f999/n+HDhzNy\n5Mgzr44tLIRrr6012N8BegM3A7+qvjI9HV59FTIjfsZFAiWm57mb2Y1mttfMis3svjDrzzKzp0Pr\nXzOzLvUvWYLk0ksvZdq0abzxxhts3bqVHj16nL537K233sqaNWs4OW+eNxRTi4lAraPrFRXeGL2I\nhBUx3M2sBbAcGARcBowxs8uqNbsNOOyc+wrwc+BnsS5UklfXrl2555572Lx5M2+99Rbf+MY3WL14\nMcfXrat1jP0p4DxgYG0bdQ7Wr4eysjhVLZLcoum59weKnXP7nXOf4/3dDa/WZjiwKvT7s8BAay6T\niku9XHTRRUycOJH8YcM4q1WrsG0+BmYAiyNtzMw7bVJEaogm3C8GDlR5XBJaFraNc+4EUA6cH4sC\nJaB27MCqnO5Y1f14XwU7RtpGRQXs3BnjwkSCIZqrQcL1wKt/l46mDWY2HhgP0KlTpyheWgKrvDzs\n4u3Ay8Cb0W7n8OEYFSQSLNGEewlndqI6AAdraVNiZi2BDOAf1TfknFsJrATvbJmGFCwBUcs87H8E\n3gMq/+n/FDgJ7AbeCPeENm1iXppIEEQzLFMIdDOzrmaWCowG1lVrsw74Qej3m4HfO7/OsZTk0KcP\nhBlzHw/sw+vBbwfuAIYAG8NtIy0NeveOY5EiyStiuIfG0O/E+/vaAzzjnNtlZnPM7KZQs/8PnG9m\nxcDdQI3TJUXOMG5c2MXpwAVVfs4BWgHtwjV2rtbtiDR3Uc3A5JxbD6yvtmxGld+PAt+LbWkSaO3b\nw6BBsGZNnVMOzKpthRkMHgztwsa+SLOnm3WIf3JyvKGVhkhL854vImEp3MU//fp5c8Skp9fveZVz\ny2jqAZFaaWJs8Vfl5F9RzAqJmddj16RhIhGp5y7+y872JgEbMcI7g6b6UE1amrd8xAivnYJdJCL1\n3CUxZGZCfr43V0xennfl6eHD3nnsvXt7Z8Xo4KlI1BTukljatYMpU/yuQiTpaVhGRCSAFO4iIgGk\ncBcRCSCFu4hIACncRUQCSOEuIhJACncRkQBSuIuIBJD5dU8NMysD/tbEL9sW+HsTv6YftJ/B0Rz2\nEbSf9dHZORfxcm3fwt0PZlbknAv8VILaz+BoDvsI2s940LCMiEgAKdxFRAKouYX7Sr8LaCLaz+Bo\nDvsI2s+Ya1Zj7iIizUVz67mLiDQLgQx3M7vRzPaaWbGZ3Rdm/Vlm9nRo/Wtm1qXpq2y8KPbzbjPb\nbWY7zOwVM+vsR52NEWkfq7S72cycmSXlGRfR7KeZjQq9n7vM7NdNXWMsRPGZ7WRmfzCzN0Of28F+\n1NkYZvYUS70MAAADKUlEQVRLMys1s7dqWW9m9nDo/8EOM/taXApxzgXqB2gB7AMuAVKBvwCXVWsz\nAXgk9Pto4Gm/647Tfn4LSA/9np1s+xnNPobanQv8F7AVyPS77ji9l92AN4E2ocft/a47Tvu5EsgO\n/X4Z8J7fdTdgP68Bvga8Vcv6wcAGwIArgdfiUUcQe+79gWLn3H7n3OfAU8Dwam2GA6tCvz8LDDQz\na8IaYyHifjrn/uCcOxJ6uBXo0MQ1NlY07yXAXOBB4GhTFhdD0ezn7cBy59xhAOdcaRPXGAvR7KcD\nWod+zwAONmF9MeGc+y/gH3U0GQ484TxbgfPM7MJY1xHEcL8YOFDlcUloWdg2zrkTQDlwfpNUFzvR\n7GdVt+H1FpJJxH00syuAjs653zVlYTEWzXvZHehuZpvNbKuZ3dhk1cVONPs5CxhrZiXAemBS05TW\npOr7t9sgQbyHargeePVTgqJpk+ii3gczGwtkAt+Ma0WxV+c+mlkK8HNgXFMVFCfRvJct8YZmrsX7\nBrbJzL7qnPsozrXFUjT7OQbIc84tNrOrgNWh/TwV//KaTJPkTxB77iVAxyqPO1Dzq93pNmbWEu/r\nX11foxJRNPuJmV0PTANucs4da6LaYiXSPp4LfBX4o5m9hzd+uS4JD6pG+5ld65w77px7F9iLF/bJ\nJJr9vA14BsA5twVohTcfS5BE9bfbWEEM90Kgm5l1NbNUvAOm66q1WQf8IPT7zcDvXehIRxKJuJ+h\nIYtH8YI9Gcdo69xH51y5c66tc66Lc64L3nGFm5xzRf6U22DRfGbX4B0gx8za4g3T7G/SKhsvmv38\nb2AggJldihfuZU1aZfytA24NnTVzJVDunDsU81fx+8hynI5WDwbexjsyPy20bA7eHz54H5jfAsXA\n68Alftccp/18GfgA2B76Wed3zbHex2pt/0gSni0T5XtpwBJgN7ATGO13zXHaz8uAzXhn0mwHbvC7\n5gbs42+AQ8BxvF76bcAdwB1V3svlof8HO+P1mdUVqiIiARTEYRkRkWZP4S4iEkAKdxGRAFK4i4gE\nkMJdRCSAFO4iIgGkcBcRCSCFu4hIAP0v8ncXPBCruC4AAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 1, 4]\n" + ] + } + ], + "source": [ + "import networkx as nx\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")\n", + "nx.draw_networkx(dataset[12])\n", + "plt.show()\n", + "print(list(dataset[12][3]))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "cf = BasicCostFunction(1,3,1,3)\n", + "N=len(dataset)\n", + "N=10\n", + "ged_distances = np.zeros((N,N))\n", + "for i in range(0,N):\n", + " for j in range(i,N):\n", + " ged_distances[j,i] = ged_distances[i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n", + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n", + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n" + ] + } + ], + "source": [ + "G1=dataset[12]\n", + "G2=dataset[12]\n", + "\n", + "print(ged(G1,G2))\n", + "print(ged(G1,G2,method='Basic'))\n", + "print(ged(G1,G2,method='Riesen'))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0: {0: [0], 3: [0, 3], 1: [0, 3, 1], 4: [0, 3, 4], 2: [0, 3, 4, 2]}, 1: {1: [1], 3: [1, 3], 0: [1, 3, 0], 4: [1, 3, 4], 2: [1, 3, 4, 2]}, 2: {2: [2], 4: [2, 4], 3: [2, 4, 3], 0: [2, 4, 3, 0], 1: [2, 4, 3, 1]}, 3: {3: [3], 0: [3, 0], 1: [3, 1], 4: [3, 4], 2: [3, 4, 2]}, 4: {4: [4], 2: [4, 2], 3: [4, 3], 0: [4, 3, 0], 1: [4, 3, 1]}}\n", + "[[ 0. 2. 3. 1. 2.]\n", + " [ 2. 0. 3. 1. 2.]\n", + " [ 3. 3. 0. 2. 1.]\n", + " [ 1. 1. 2. 0. 1.]\n", + " [ 2. 2. 1. 1. 0.]]\n" + ] + } + ], + "source": [ + "l = nx.shortest_path(G1)\n", + "print(l)\n", + "\n", + "distances = np.zeros((G1.number_of_nodes(),G1.number_of_nodes()))\n", + "for i in l.keys():\n", + " for j in l[i].keys():\n", + " distances[i,j] = len(l[i][j])-1\n", + "\n", + "print(distances)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + }, + "name": "test_lib.ipynb" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/py-graph_test.ipynb b/notebooks/py-graph_test.ipynb new file mode 100644 index 0000000..96e0d0d --- /dev/null +++ b/notebooks/py-graph_test.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import paths\n", + "\n", + "import pygraph\n", + "\n", + "from pygraph.utils.graphfiles import loadDataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import networkx as nx\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# We load a ds dataset\n", + "# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n", + "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n", + "100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n", + "100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n", + "100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n" + ] + } + ], + "source": [ + "#Compute graph edit distances\n", + "\n", + "from tqdm import tqdm\n", + "from pygraph.c_ext.lsape_binders import lsap_solverHG\n", + "from pygraph.ged.costfunctions import ConstantCostFunction\n", + "from pygraph.ged.GED import ged\n", + "import time\n", + "\n", + "cf = ConstantCostFunction(1,3,1,3)\n", + "N=len(dataset)\n", + "\n", + "methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n", + "ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n", + "\n", + "times = list()\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", + "times.append(time.clock() - start)\n", + "\n", + "\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n", + "\n", + "times.append(time.clock() - start)\n", + "\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n", + "times.append(time.clock() - start)\n", + "\n", + "start = time.clock()\n", + "for i in tqdm(range(0,N)):\n", + " for j in range(0,N):\n", + " ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n", + "times.append(time.clock() - start)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " method \t mean \t mean \t time\n", + " Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n", + " Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n", + " Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n", + " Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n" + ] + } + ], + "source": [ + "print(\" method \\t mean \\t mean \\t time\")\n", + "data = list()\n", + "for i in range(0,len(ged_distances)):\n", + " ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n", + " print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + }, + "name": "py-graph_test.ipynb" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pygraph/__init__.py b/pygraph/__init__.py new file mode 100644 index 0000000..3eafadb --- /dev/null +++ b/pygraph/__init__.py @@ -0,0 +1,21 @@ +# -*-coding:utf-8 -*- +""" +Pygraph + +This package contains 4 sub packages : + * c_ext : binders to C++ code + * ged : allows to compute graph edit distance between networkX graphs + * kernels : computation of graph kernels, ie graph similarity measure compatible with SVM + * notebooks : examples of code using this library + * utils : Diverse computation on graphs +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +# import sub modules +from pygraph import c_ext +from pygraph import ged +from pygraph import utils diff --git a/pygraph/c_ext/Makefile b/pygraph/c_ext/Makefile new file mode 100644 index 0000000..71c3eb6 --- /dev/null +++ b/pygraph/c_ext/Makefile @@ -0,0 +1,5 @@ +# You must specify your env variable LSAPE_DIR +#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/ + +liblsap.so:lsap.cpp + g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR) diff --git a/pygraph/c_ext/README.md b/pygraph/c_ext/README.md new file mode 100644 index 0000000..f1aa549 --- /dev/null +++ b/pygraph/c_ext/README.md @@ -0,0 +1,6 @@ +Python wrapper for lsape method + +Specify your LSAPE_DIR env variable with the location of the source +code to compile + +source code : https://bougleux.users.greyc.fr/lsape/ diff --git a/pygraph/c_ext/__init__.py b/pygraph/c_ext/__init__.py new file mode 100644 index 0000000..7101e7e --- /dev/null +++ b/pygraph/c_ext/__init__.py @@ -0,0 +1,17 @@ +# -*-coding:utf-8 -*- +"""Pygraph - c_ext module + +This package binds some C++ code to python + +lsape_binders.py : binders to C++ code of LSAPE methods implemented in +https://bougleux.users.greyc.fr/lsape/ + +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +# import sub modules +from pygraph.c_ext import lsape_binders diff --git a/pygraph/c_ext/lsap.cpp b/pygraph/c_ext/lsap.cpp new file mode 100644 index 0000000..712a092 --- /dev/null +++ b/pygraph/c_ext/lsap.cpp @@ -0,0 +1,43 @@ +/* +Python wrapper +*/ + +#include "hungarian-lsape.hh" +#include "hungarian-lsap.hh" + +#include + +extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){ + double * u = new double[nm]; + double * v = new double[nm]; + + int * rho_int = new int[nm]; + int * varrho_int = new int[nm]; + + hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int); + //Find a better way to do + for (int i =0;i= m): ged += cf.cnd(i, G1) else: ged += cf.cns(i, phi_i, G1, G2) - for j in G2.nodes_iter(): + for j in G2.nodes(): phi_j = varrho[j] if(phi_j >= n): ged += cf.cni(j, G2) - for e in G1.edges_iter(data=True): + for e in G1.edges(data=True): i = e[0] j = e[1] phi_i = rho[i] @@ -49,13 +51,13 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None, if(mappedEdge): e2 = [phi_i, phi_j, G2[phi_i][phi_j]] min_cost = min(cf.ces(e, e2, G1, G2), - cf.ced(e, G1), cf.cei(e2, G2)) + cf.ced(e, G1) + cf.cei(e2, G2)) ged += min_cost else: ged += cf.ced(e, G1) else: ged += cf.ced(e, G1) - for e in G2.edges_iter(data=True): + for e in G2.edges(data=True): i = e[0] j = e[1] phi_i = varrho[i] @@ -68,7 +70,3 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None, else: ged += cf.ced(e, G2) return ged, rho, varrho - - -def computeDistanceMatrix(dataset): - pass diff --git a/pygraph/ged/__init__.py b/pygraph/ged/__init__.py new file mode 100644 index 0000000..7f5b1bc --- /dev/null +++ b/pygraph/ged/__init__.py @@ -0,0 +1,17 @@ +# -*-coding:utf-8 -*- +"""Pygraph - ged module + +Implement some methods to compute ged between graphs + + +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +from pygraph.ged import costfunctions +from pygraph.ged import bipartiteGED +from pygraph.ged import GED + diff --git a/ged/bipartiteGED.py b/pygraph/ged/bipartiteGED.py similarity index 62% rename from ged/bipartiteGED.py rename to pygraph/ged/bipartiteGED.py index 2599eb0..b997f9e 100644 --- a/ged/bipartiteGED.py +++ b/pygraph/ged/bipartiteGED.py @@ -1,9 +1,9 @@ import numpy as np from scipy.optimize import linear_sum_assignment -from ged.costfunctions import BasicCostFunction +from pygraph.ged.costfunctions import ConstantCostFunction -def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): +def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)): """Compute a Cost Matrix according to cost function cf""" n = G1.number_of_nodes() m = G2.number_of_nodes() @@ -11,23 +11,23 @@ def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)): C = np.ones([nm, nm])*np.inf C[n:, m:] = 0 - for u in G1.nodes_iter(): - for v in G2.nodes_iter(): + for u in G1.nodes(): + for v in G2.nodes(): cost = cf.cns(u, v, G1, G2) C[u, v] = cost - for v in G1.nodes_iter(): + for v in G1.nodes(): C[v, m + v] = cf.cnd(v, G1) - for v in G2.nodes_iter(): + for v in G2.nodes(): C[n + v, v] = cf.cni(v, G2) return C -def getOptimalMapping(C): +def getOptimalMapping(C, lsap_solver=linear_sum_assignment): """Compute an optimal linear mapping according to cost Matrix C inclure les progs C de Seb """ - row_ind, col_ind = linear_sum_assignment(C) + row_ind, col_ind = lsap_solver(C) return col_ind, row_ind[np.argsort(col_ind)] diff --git a/ged/costfunctions.py b/pygraph/ged/costfunctions.py similarity index 52% rename from ged/costfunctions.py rename to pygraph/ged/costfunctions.py index 4d2be90..28318de 100644 --- a/ged/costfunctions.py +++ b/pygraph/ged/costfunctions.py @@ -2,15 +2,17 @@ import numpy as np from scipy.optimize import linear_sum_assignment -class BasicCostFunction: +class ConstantCostFunction: + """ Define a symmetric constant cost fonction for edit operations """ def __init__(self, cns, cni, ces, cei): self.cns_ = cns self.cni_ = self.cnd_ = cni self.ces_ = ces self.cei_ = self.ced_ = cei - def cns(self, u, v, G1, G2): - return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_ + def cns(self, node_u, node_v, g1, g2): + """ return substitution edit operation cost between node_u of G1 and node_v of G2""" + return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_ def cnd(self, u, G1): return self.cnd_ @@ -30,9 +32,11 @@ class BasicCostFunction: return self.cei_ -class RiesenCostFunction(BasicCostFunction): - def __init__(self, cf): - BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) +class RiesenCostFunction(): + """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" + def __init__(self, cf, lsap_solver=linear_sum_assignment): + self.cf_ = cf + self.lsap_solver_ = lsap_solver def cns(self, u, v, G1, G2): """ u et v sont des id de noeuds """ @@ -48,41 +52,43 @@ class RiesenCostFunction(BasicCostFunction): e1 = [u, nbr_u, G1[u][nbr_u]] for nbr_v in G2[v]: e2 = [v, nbr_v, G2[v][nbr_v]] - sub_C[i, j] = self.ces(e1, e2, G1, G2) + sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) j += 1 i += 1 i = 0 for nbr_u in l_nbr_u: - sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) + sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) i += 1 j = 0 for nbr_v in l_nbr_v: - sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) + sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) j += 1 - row_ind, col_ind = linear_sum_assignment(sub_C) + row_ind, col_ind = self.lsap_solver_(sub_C) cost = np.sum(sub_C[row_ind, col_ind]) - return BasicCostFunction.cns(self, u, v, G1, G2) + cost + return self.cf_.cns(u, v, G1, G2) + cost def cnd(self, u, G1): cost = 0 for nbr in G1[u]: - cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1) + cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1) - return BasicCostFunction.cnd(self,u,G1) + cost + return self.cf_.cnd(u,G1) + cost def cni(self, v, G2): cost = 0 for nbr in G2[v]: - cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2) + cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2) - return BasicCostFunction.cni(self, v, G2) + cost + return self.cf_.cni(v, G2) + cost -class NeighboorhoodCostFunction(BasicCostFunction): - def __init__(self, cf): - BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_) +class NeighboorhoodCostFunction(): + """ Cost function associated to the computation of a cost matrix between nodes for LSAP""" + def __init__(self, cf, lsap_solver=linear_sum_assignment): + self.cf_ = cf + self.lsap_solver_ = lsap_solver def cns(self, u, v, G1, G2): """ u et v sont des id de noeuds """ @@ -98,36 +104,35 @@ class NeighboorhoodCostFunction(BasicCostFunction): e1 = [u, nbr_u, G1[u][nbr_u]] for nbr_v in G2[v]: e2 = [v, nbr_v, G2[v][nbr_v]] - sub_C[i, j] = self.ces(e1, e2, G1, G2) - sub_C[i, j] += BasicCostFunction.cns(self, - nbr_u, nbr_v, G1, G2) + sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2) + sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2) j += 1 i += 1 i = 0 for nbr_u in l_nbr_u: - sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1) - sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1) + sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1) + sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1) i += 1 j = 0 for nbr_v in l_nbr_v: - sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2) - sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2) + sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2) + sub_C[n+j, j] += self.cf_.cni(nbr_v, G2) j += 1 - row_ind, col_ind = linear_sum_assignment(sub_C) + row_ind, col_ind = self.lsap_solver_(sub_C) cost = np.sum(sub_C[row_ind, col_ind]) - return BasicCostFunction.cns(self, u, v, G1, G2) + cost + return self.cf_.cns(u, v, G1, G2) + cost def cnd(self, u, G1): cost = 0 for nbr in G1[u]: - cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1) - return BasicCostFunction.cnd(self, u, G1) + cost + cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1) + return self.cf_.cnd(u, G1) + cost def cni(self, v, G2): cost = 0 for nbr in G2[v]: - cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2) - return BasicCostFunction.cni(self, v, G2) + cost + cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2) + return self.cf_.cni(v, G2) + cost diff --git a/kernels/.gitignore b/pygraph/kernels/.gitignore similarity index 100% rename from kernels/.gitignore rename to pygraph/kernels/.gitignore diff --git a/pygraph/kernels/spkernel.py b/pygraph/kernels/spkernel.py new file mode 100644 index 0000000..2b1823a --- /dev/null +++ b/pygraph/kernels/spkernel.py @@ -0,0 +1,68 @@ +import sys +import pathlib +sys.path.insert(0, "../") + + +import networkx as nx +import numpy as np +import time + +from utils.utils import getSPGraph + + +def spkernel(*args): + """Calculate shortest-path kernels between graphs. + + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + / + G1, G2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + + Return + ------ + Kmatrix/Kernel : Numpy matrix/int + Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs. + + References + ---------- + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. + """ + if len(args) == 1: # for a list of graphs + Gn = args[0] + + Kmatrix = np.zeros((len(Gn), len(Gn))) + + Sn = [] # get shortest path graphs of Gn + for i in range(0, len(Gn)): + Sn.append(getSPGraph(Gn[i])) + + start_time = time.time() + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Sn[i].edges(data = True): + for e2 in Sn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] += (0 if i == j else 1) + + print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) + + return Kmatrix + + else: # for only 2 graphs + G1 = args[0] + G2 = args[1] + + kernel = 0 + + for e1 in G1.edges(data = True): + for e2 in G2.edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + kernel += 1 + + print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) + + return kernel \ No newline at end of file diff --git a/pygraph/utils/__init__.py b/pygraph/utils/__init__.py new file mode 100644 index 0000000..501d1aa --- /dev/null +++ b/pygraph/utils/__init__.py @@ -0,0 +1,17 @@ +# -*-coding:utf-8 -*- +"""Pygraph - utils module + +Implement some methods to manage graphs + graphfiles.py : load .gxl and .ct files + utils.py : compute some properties on networkX graphs + + +""" + +# info +__version__ = "0.1" +__author__ = "Benoit Gaüzère" +__date__ = "November 2017" + +from pygraph.utils import graphfiles +from pygraph.utils import utils diff --git a/utils/graphfiles.py b/pygraph/utils/graphfiles.py similarity index 65% rename from utils/graphfiles.py rename to pygraph/utils/graphfiles.py index b561f38..c0ab9a3 100644 --- a/utils/graphfiles.py +++ b/pygraph/utils/graphfiles.py @@ -1,13 +1,25 @@ import networkx as nx - - + def loadCT(filename): + """load data from .ct file. + + Notes + ------ + a typical example of data in .ct is like this: + + 3 2 <- number of nodes and edges + 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers? + 0.0000 0.0000 0.0000 C + 0.0000 0.0000 0.0000 O + 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers? + 2 3 1 1 + """ content = open(filename).read().splitlines() - G = nx.Graph(name=str(content[0])) + G = nx.Graph(name=str(content[0])) # set name of the graph tmp = content[1].split(" ") if tmp[0] == '': - nb_nodes = int(tmp[1]) - nb_edges = int(tmp[2]) + nb_nodes = int(tmp[1]) # number of the nodes + nb_edges = int(tmp[2]) # number of the edges else: nb_nodes = int(tmp[0]) nb_edges = int(tmp[1]) @@ -18,7 +30,7 @@ def loadCT(filename): G.add_node(i, label=tmp[3]) for i in range(0, nb_edges): - tmp = content[i+G.number_of_nodes()+2].split(" ") + tmp = content[i + G.number_of_nodes() + 2].split(" ") tmp = [x for x in tmp if x != ''] G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3])) return G @@ -43,9 +55,10 @@ def loadGXL(filename): label = edge.find('attr')[0].text G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label) return G - - + def loadDataset(filename): + """load file list of the dataset. + """ from os.path import dirname, splitext dirname_dataset = dirname(filename) @@ -56,7 +69,7 @@ def loadDataset(filename): content = open(filename).read().splitlines() for i in range(0, len(content)): tmp = content[i].split(' ') - data.append(loadCT(dirname_dataset + '/' + tmp[0])) + data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names y.append(float(tmp[1])) elif(extension == "cxl"): import xml.etree.ElementTree as ET diff --git a/pygraph/utils/utils.py b/pygraph/utils/utils.py new file mode 100644 index 0000000..52a85f1 --- /dev/null +++ b/pygraph/utils/utils.py @@ -0,0 +1,59 @@ +import networkx as nx +import numpy as np + + +def getSPLengths(G1): + sp = nx.shortest_path(G1) + distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) + for i in np.keys(): + for j in np[i].keys(): + distances[i, j] = len(sp[i][j])-1 + return distances + +def getSPGraph(G): + """Transform graph G to its corresponding shortest-paths graph. + + Parameters + ---------- + G : NetworkX graph + The graph to be tramsformed. + + Return + ------ + S : NetworkX graph + The shortest-paths graph corresponding to G. + + Notes + ------ + For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes. + + References + ---------- + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. + """ + return floydTransformation(G) + +def floydTransformation(G): + """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation. + + Parameters + ---------- + G : NetworkX graph + The graph to be tramsformed. + + Return + ------ + S : NetworkX graph + The shortest-paths graph corresponding to G. + + References + ---------- + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. + """ + spMatrix = nx.floyd_warshall_numpy(G) # @todo weigth label not considered + S = nx.Graph() + S.add_nodes_from(G.nodes(data=True)) + for i in range(0, G.number_of_nodes()): + for j in range(0, G.number_of_nodes()): + S.add_edge(i, j, cost = spMatrix[i, j]) + return S diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..8f42f9e --- /dev/null +++ b/tests/README.md @@ -0,0 +1,5 @@ +To use the library : +$> virtualenv --python=/usr/bin/python3.5 venv +$> pip install -r requirements.txt +$> source venv/bin/activate +... Go use pygraph diff --git a/tests/opt.py b/tests/opt.py new file mode 100644 index 0000000..f650bbf --- /dev/null +++ b/tests/opt.py @@ -0,0 +1,66 @@ +import ot +import sys +import pathlib +sys.path.insert(0, "../") + +from pygraph.utils.graphfiles import loadDataset +from pygraph.ged.costfunctions import ConstantCostFunction +from pygraph.utils.utils import getSPLengths +from tqdm import tqdm +import numpy as np +from scipy.optimize import linear_sum_assignment +from pygraph.ged.GED import ged +import scipy + +def pad(C, n): + C_pad = np.zeros((n, n)) + C_pad[:C.shape[0], :C.shape[1]] = C + return C_pad + +if (__name__ == "__main__"): + ds_filename = "/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds" + dataset, y = loadDataset(ds_filename) + cf = ConstantCostFunction(1, 3, 1, 3) + N = len(dataset) + + pairs = list() + + ged_distances = list() #np.zeros((N, N)) + gw_distances = list() #np.zeros((N, N)) + for i in tqdm(range(0, N)): + for j in tqdm(range(i, N)): + G1 = dataset[i] + G2 = dataset[j] + n = G1.number_of_nodes() + m = G2.number_of_nodes() + if(n == m): + C1 = getSPLengths(G1) + C2 = getSPLengths(G2) + + C1 /= C1.max() + C2 /= C2.max() + + dim = max(n, m) + if(n < m): + C1 = pad(C1, dim) + elif (m < n): + C2 = pad(C2, dim) + + p = ot.unif(dim) + q = ot.unif(dim) + + gw = ot.gromov_wasserstein(C1, C2, p, q, + 'square_loss', epsilon=5e-3) + row_ind, col_ind = linear_sum_assignment(-gw) + rho = col_ind + varrho = row_ind[np.argsort(col_ind)] + pairs.append((i,j)) + gw_distances.append(ged(G1, G2, cf=cf, rho=rho, varrho=varrho)[0]) + + ged_distances.append(ged(G1, G2, cf=cf)[0]) + + print("Moyenne sur Riesen : {}".format(np.mean(ged_distances))) + print("Moyenne sur GW : {} ".format(np.mean(gw_distances))) + + np.save("distances_riesen", ged_distances) + np.save("distances_gw", gw_distances) diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..9505125 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,16 @@ +cycler==0.10.0 +Cython==0.27.3 +decorator==4.1.2 +matplotlib==2.1.0 +networkx==2.0 +numpy==1.13.3 +pkg-resources==0.0.0 +POT==0.4.0 +pyparsing==2.2.0 +python-dateutil==2.6.1 +pytz==2017.3 +scikit-learn==0.19.1 +scipy==1.0.0 +six==1.11.0 +sklearn==0.0 +tqdm==4.19.4 diff --git a/utils/utils.py b/utils/utils.py deleted file mode 100644 index d9145b9..0000000 --- a/utils/utils.py +++ /dev/null @@ -1,10 +0,0 @@ -import networkx as nx -import numpy as np - - -def getSPLengths(G1): - sp = nx.shortest_path(G1) - distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes())) - for i in np.keys(): - for j in np[i].keys(): - distances[i, j] = len(sp[i][j])-1