{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "autoscroll": false, "ein.tags": "worksheet-0", "scrolled": true, "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--- This is a regression problem ---\n", "\n", "1. Loading dataset from file...\n", "\n", "2. Calculating gram matrices. This could take a while...\n", "\n", " --- treelet kernel matrix of size 183 built in 0.4760627746582031 seconds ---\n", "\n", "gram matrix with parameters {} is: \n", "[[1.00000000e+00 5.32055843e-01 2.50000000e-01 ... 1.49248622e-15\n", " 1.35000458e-15 1.58302069e-15]\n", " [5.32055843e-01 1.00000000e+00 2.04124145e-01 ... 1.21860990e-15\n", " 1.10227413e-15 1.29253098e-15]\n", " [2.50000000e-01 2.04124145e-01 1.00000000e+00 ... 3.53553391e-01\n", " 3.19801075e-01 3.75000000e-01]\n", " ...\n", " [1.49248622e-15 1.21860990e-15 3.53553391e-01 ... 1.00000000e+00\n", " 6.56031966e-01 4.82612551e-01]\n", " [1.35000458e-15 1.10227413e-15 3.19801075e-01 ... 6.56031966e-01\n", " 1.00000000e+00 5.18025037e-01]\n", " [1.58302069e-15 1.29253098e-15 3.75000000e-01 ... 4.82612551e-01\n", " 5.18025037e-01 1.00000000e+00]]\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", "calculate performance: 0%| | 1/1230 [00:00<02:46, 7.39it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/ljia/.local/lib/python3.5/site-packages/sklearn/linear_model/ridge.py:154: UserWarning: Singular matrix in solving dual problem. Using least-squares solution instead.\n", " warnings.warn(\"Singular matrix in solving dual problem. Using \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " \n", "4. Getting final performances...\n", "\n", "best_params_out: [{}]\n", "best_params_in: [{'alpha': 0.11220184543019636}]\n", "\n", "best_val_perf: 13.861477851137243\n", "best_val_std: 0.8028610844640164\n", "final_performance: 15.37956283368154\n", "final_confidence: 3.5631802486132806\n", "train_performance: 7.331843671597928\n", "train_std: 0.637398126098533\n", "\n", "time to calculate gram matrix with different hyperpapams: 0.48±nan\n", "time to calculate best gram matrix: 0.4760627746582031 s\n", "\n", "params train_perf valid_perf test_perf gram_matrix_time\n", "--------------------- ------------ ------------- ------------- ------------------\n", "{'alpha': '1.00e-02'} 22.70±22.02 81.42±74.74 101.90±112.19 0.48\n", "{'alpha': '1.12e-02'} 26.94±30.78 100.10±120.17 136.55±232.60 0.48\n", "{'alpha': '1.26e-02'} 20.37±16.77 76.50±75.91 93.52±125.63 0.48\n", "{'alpha': '1.41e-02'} 20.16±20.49 68.01±54.55 94.35±166.05 0.48\n", "{'alpha': '1.58e-02'} 15.43±14.61 50.40±46.46 61.88±74.52 0.48\n", "{'alpha': '1.78e-02'} 11.11±6.08 37.92±31.39 39.19±25.19 0.48\n", "{'alpha': '2.00e-02'} 9.89±4.47 30.13±13.90 34.92±25.38 0.48\n", "{'alpha': '2.24e-02'} 9.41±6.38 28.04±21.82 36.17±57.51 0.48\n", "{'alpha': '2.51e-02'} 8.84±6.65 25.46±22.11 33.22±57.93 0.48\n", "{'alpha': '2.82e-02'} 8.48±5.60 24.71±23.41 30.01±46.09 0.48\n", "{'alpha': '3.16e-02'} 7.70±3.36 20.83±12.65 22.88±15.56 0.48\n", "{'alpha': '3.55e-02'} 12.76±32.12 41.43±132.33 32.93±73.73 0.48\n", "{'alpha': '3.98e-02'} 7.15±2.36 17.87±8.64 19.09±7.92 0.48\n", "{'alpha': '4.47e-02'} 6.89±1.43 16.39±4.42 17.78±5.73 0.48\n", "{'alpha': '5.01e-02'} 6.80±1.12 15.59±2.96 16.99±4.76 0.48\n", "{'alpha': '5.62e-02'} 6.77±0.97 15.03±2.20 16.44±4.19 0.48\n", "{'alpha': '6.31e-02'} 6.79±0.87 14.62±1.73 16.03±3.85 0.48\n", "{'alpha': '7.08e-02'} 6.85±0.80 14.33±1.42 15.74±3.65 0.48\n", "{'alpha': '7.94e-02'} 6.93±0.75 14.11±1.19 15.55±3.54 0.48\n", "{'alpha': '8.91e-02'} 7.04±0.70 13.97±1.03 15.43±3.51 0.48\n", "{'alpha': '1.00e-01'} 7.17±0.67 13.89±0.90 15.37±3.52 0.48\n", "{'alpha': '1.12e-01'} 7.33±0.64 13.86±0.80 15.38±3.56 0.48\n", "{'alpha': '1.26e-01'} 7.52±0.61 13.88±0.73 15.44±3.63 0.48\n", "{'alpha': '1.41e-01'} 7.73±0.58 13.95±0.67 15.55±3.71 0.48\n", "{'alpha': '1.58e-01'} 7.96±0.56 14.06±0.62 15.71±3.81 0.48\n", "{'alpha': '1.78e-01'} 8.22±0.54 14.22±0.58 15.91±3.90 0.48\n", "{'alpha': '2.00e-01'} 8.51±0.52 14.41±0.55 16.15±4.01 0.48\n", "{'alpha': '2.24e-01'} 8.83±0.49 14.64±0.52 16.44±4.12 0.48\n", "{'alpha': '2.51e-01'} 9.18±0.47 14.91±0.49 16.77±4.23 0.48\n", "{'alpha': '2.82e-01'} 9.57±0.45 15.22±0.47 17.15±4.34 0.48\n", "{'alpha': '3.16e-01'} 9.99±0.43 15.56±0.45 17.56±4.45 0.48\n", "{'alpha': '3.55e-01'} 10.44±0.41 15.95±0.43 18.03±4.57 0.48\n", "{'alpha': '3.98e-01'} 10.94±0.39 16.38±0.42 18.53±4.69 0.48\n", "{'alpha': '4.47e-01'} 11.48±0.38 16.86±0.41 19.09±4.81 0.48\n", "{'alpha': '5.01e-01'} 12.07±0.36 17.38±0.40 19.69±4.93 0.48\n", "{'alpha': '5.62e-01'} 12.71±0.34 17.95±0.40 20.34±5.06 0.48\n", "{'alpha': '6.31e-01'} 13.40±0.33 18.57±0.40 21.04±5.18 0.48\n", "{'alpha': '7.08e-01'} 14.15±0.31 19.24±0.41 21.79±5.31 0.48\n", "{'alpha': '7.94e-01'} 14.96±0.30 19.96±0.41 22.60±5.44 0.48\n", "{'alpha': '8.91e-01'} 15.83±0.29 20.75±0.42 23.47±5.58 0.48\n", "{'alpha': '1.00e+00'} 16.76±0.28 21.59±0.43 24.39±5.71 0.48\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:135: RuntimeWarning: Degrees of freedom <= 0 for slice\n", " keepdims=keepdims)\n", "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:127: RuntimeWarning: invalid value encountered in double_scalars\n", " ret = ret.dtype.type(ret / rcount)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\r", "calculate performance: 100%|██████████| 1230/1230 [02:10<00:00, 11.01it/s]" ] } ], "source": [ "%load_ext line_profiler\n", "%matplotlib inline\n", "import numpy as np\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n", "from pygraph.kernels.treeletKernel import treeletkernel\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "estimator = treeletkernel\n", "param_grid_precomputed = {}\n", "param_grid = {'alpha': np.logspace(-2, 0, num = 41, base = 10)}\n", "\n", "model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, \n", " 'regression', NUM_TRIALS=30)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "autoscroll": false, "ein.tags": "worksheet-0", "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "%load_ext line_profiler\n", "\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.treeletKernel import treeletkernel\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", "\n", "kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", "\n", "# %lprun -f treeletkernel \\\n", "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "autoscroll": false, "ein.tags": "worksheet-0", "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# results\n", "\n", "# with y normalization\n", " RMSE_test std_test RMSE_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 8.3079 3.37838 2.90887 1.2679 0.500302\n", "\n", "# without y normalization\n", " RMSE_test std_test RMSE_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 10.0997 5.03584 2.68803 1.54162 0.484171\n", "\n", " \n", "\n", "# G0 -> WL subtree h = 0\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 13.9223 2.88611 13.373 0.653301 0.186731\n", "\n", "# G0 U G1 U G6 U G8 U G13 -> WL subtree h = 1\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 8.97706 2.90771 6.7343 1.17505 0.223171\n", " \n", "# all patterns \\ { G3 U G4 U G5 U G10 } -> WL subtree h = 2 \n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 7.31274 1.96289 3.73909 0.406267 0.294902\n", "\n", "# all patterns \\ { G4 U G5 } -> WL subtree h = 3\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 8.39977 2.78309 3.8606 1.58686 0.348912\n", "\n", "# all patterns \\ { G5 } \n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 9.47647 4.22113 3.18029 1.5669 0.423638\n", " \n", " \n", " \n", "# G0, -> WL subtree h = 0\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 13.9223 2.88611 13.373 0.653301 0.186731 \n", " \n", "# G0 U G1 U G2 U G6 U G8 U G13 -> WL subtree h = 1\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 8.62431 2.54327 5.63422 0.255002 0.290797\n", " \n", "# all patterns \\ { G5 U G10 } -> WL subtree h = 2\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", " 10.1294 3.50275 3.69664 1.55116 0.418498" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "autoscroll": false, "ein.tags": "worksheet-0", "scrolled": true, "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xt8VNW99/HPj4uQSEmDgiLh1gKKllA1oa2HAxwBBbxSvFdNePTQRtHTamiltdpjtbG8Yo/FYqrHWlJR1AdsykNBBQNYUSGIaRAVRLwFpQmK8ZKAEtbzx57EYTKTC9lzSeb7fr3mldlrr733L8Pw2ztrr72WOecQEZHk0iXeAYiISOwp+YuIJCElfxGRJKTkLyKShJT8RUSSkJK/iEgSUvIXEUlCSv4iIklIyV9EJAl1i3cAkRx99NFuyJAh8Q5DRKRDeemll/Y45/q2VC9hk/+QIUPYtGlTvMMQEelQzOyd1tRTs4+ISBJS8hcRSUJK/iIiSUjJX0QkCSn5i4gkISV/EZEkpOQvIpKElPxFRJJQwj7kJXFWVQULF0JFBdTUQFoaZGbCzJnQt8WHB0UkwSn5y6HKyqCgAFau9Jb37ftq3RNPwK23wtSpMHcuZGfHJ0YRaTc1+8hXiopgwgQoKfGSfnDiB6ir88pKSrx6RUXxiFJEfKArf/EUFUF+PtTWtlzXOa9efr63nJcX3dhExHe68hevqSdM4v8ImA4cCQwGHgndruEEoAH4RDocJX/x2vjr6poUXwscAfwLeBjIA7aGVqqr87YXkQ7Fl+RvZg+aWZWZvRJhvZnZfDPbYWYVZnaKH8cVH1RVeTd3nTuk+HNgKfBroBcwFjgXeCh0e+dgxQqoro5BsCLiF7+u/BcCU5pZPxUYHnjNAnSnMFEsXBi2eDveDaERQWWjCXPlD2AWcT8ikph8Sf7OuWfxmogjOQ/4i/O8CHzdzPr7cWxpp4qKpr16gM+A3iFlacCn4fZRVwdbtvgfm4hETaza/AcA7wUtVwbKDmFms8xsk5ltqlYzQmzU1IQt7gV8ElL2CfC1SPvZu9e/mEQk6hLqhq9z7n7nXJZzLquvniKNjbS0sMUjgAPAG0Fl/wROirSf9HRfwxKR6IpV8t8FDAxazgiUSbxlZkLPnk2KjwS+D9yCd/N3PfA34Ipw+0hJgVGjohikiPgtVsl/GXBloNfPd4Ea59wHMTq2NCc3l4MHD4ZddS9QB/QDLsW7Sx/2yt85yM2NUoAiEg2+POFrZouBCcDRZlYJ3Ap0B3DO/RFYAUwDdgC1wEw/jivts2fPHn5+882cb8YUM7qEdPfsA5S0tBMzmDZNg72JdDB+9fa51DnX3znX3TmX4Zz7k3Puj4HET6CXz7XOuW8650Y55/RIaBwdOHCABQsWcOKJJ5Kamsq/r1hBl5SUw9tZSoo3yJuIdCga2yfJ/OMf/2D27Nn06dOHZ555hlENbfWFha0f26dBaqq3XVZWdIIVkahR8k8S77//PnPmzOHZZ5+lsLCQiy66CDP7qkLD4Gz5+V6//ZAmoEOYeVf8hYUa1E2kg0qorp7ivy+++IJ58+aRmZnJkCFDeP3117n44osPTfwN8vJg3TqYPt3rARTaFJSS4pVPn+7VU+IX6bB05d+JPfXUU1x//fUMGzaMF198kWHDhrW8UVYWLF3qjdWzcKH35O7evV4//lGjvF49urkr0uEp+XdCb731FjfccANbtmzh7rvv5uyzz277Tvr2hTlz/A9ORBKCmn06kdraWm699Vays7PJysrilVdeObzELyKdnq78OwHnHCUlJfzkJz9hzJgxbN68mUGDBsU7LBFJYEr+Hdzrr7/O9ddfz65du3jwwQc5/fTT4x2SiHQAavbpoD755BPmzJnD2LFjmTZtGuXl5Ur8ItJqSv4djHOORYsWMXLkSPbs2cPWrVv58Y9/TPfu3eMdmoh0IGr26UDKy8uZPXs2+/btY+nSpXz3u9+Nd0gi0kHpyr8D+Oijj7jmmms488wzycnJYcOGDUr8ItIuuvKPhaoq74Gpigpv5qy0NG8c/Zkzm31gqr6+ngceeIBbbrmFCy+8kNdee40+ffrELm4R6bSU/KOprAwKCmDlSm85eK7cJ56AW2+FqVO9UTGzsw/Z9IUXXmD27Nmkpqby9NNPM3r06BgGLiKdnZp9oqWoCCZMgJISL+mHTpJeV+eVlZR49YqKANi9eze5ublceOGF3HjjjTz77LNK/CLiOyX/aCgq+mp4ZOfYD1wFDMabAP3bwMqGus5BbS0uP5/VF1zAt771LY455hhee+01LrvssvADsImItJOaffxWVtZkXPwDeBMYrwMG4U1rdhGwBRgSqGO1tYz961/Z9NhjDLnggtjGLCJJR1f+fiso8Jp0ghwJ/Aov0XcBzgaGAi+FbNrDOYYsXhz9GEUk6Sn5+6mqyru529xEKMC/gO00nQzdnIMVK7zhlEVEokjJ308LF7ZY5UvgB0AOcEK4Cmat2o+ISHso+fupoqJpr54gB4ErgCOAP0SqVFfnTaAiIhJFuuHrp5qaiKscXo+ff+Hd8G12JJ69e30NS0QklJK/n9LSIq7KA14DVgMpEWsFpKf7F5OISBhq9vFTZqY3wXmId4D7gHLgWKBX4PVwuH2kpHhz5YqIRJGSv59yc8MWD8Zr9tkHfBb0+kG4ys5F3I+IiF+U/P3Ur583Vs/hPpVrBtOmNTvYm4iIH5T8/TZ3rtd0czhSUrztRUSiTMnfb9nZUFgIqalt2y411dsuKys6cYmIBFFvn2jIywOg/ic/gf376dpcXTPvir+wsHE7EZFo05V/tOTlcc1JJ7ErK8vrARTaFJSS4pVPnw7r1inxi0hM6co/StavX8+qvXtZsH2799DWwoXek7t793r9+EeN8nr16OauiMSBkn+UFBQU8LOf/Yxu3bp5CX7OnHiHJCLSSMk/CsrLy3n55ZdZsmRJvEMREQlLbf5RUFBQwA033EDPME/7iogkAl35+2z79u2Ulpbypz/9Kd6hiIhEpCt/n/32t79l9uzZ9OrVK96hiIhE5EvyN7MpZrbNzHaY2U1h1ueaWbWZlQdeV/tx3ETz7rvvUlJSwnXXXRfvUEREmtXuZh8z6wosACYDlUCZmS1zzr0aUvUx59zs9h4vkd11111cddVV9OnTJ96hiIg0y482/zHADufcTgAzexQ4DwhN/p1aVVUVDz30EFu3bo13KCIiLfKj2WcA8F7QcmWgLNQMM6swsyVmNtCH4yaUu+++m0suuYT+/fvHOxQRkRbFqrfP/wMWO+f2m9kPgWLg9NBKZjYLmAUwaNCgGIXWfh9//DH33XcfmzZtincoIiKt4seV/y4g+Eo+I1DWyDn3oXNuf2DxAeDUcDtyzt3vnMtyzmX17UDDHtx7772cddZZDB06NN6hiIi0ih9X/mXAcDMbipf0LwEuC65gZv2dcx8EFs/Fm862U6itreX3v/89a9eujXcoIiKt1u7k75w7YGazgaeArsCDzrmtZnYbsMk5twy43szOBQ4AHwG57T1uzFVVeYOzVVRATY03WXtmJou+/JKxY8cycuTIeEcoItJq5pyLdwxhZWVluYRoQy8rg4ICWLnSW963r3GVS0lhf10ddRMmkD5vnjeRi4hIHJnZS865FmeF0hO+zSkqggkToKTES/pBiR/A6uroCaSvW+fVKyqKR5QiIm2msX0iKSqC/HyorW25rnNevfx8b1kTs4hIgtOVfzhlZWET/wSgJ9Ar8Do+dLuGE0AiNFeJiDRDyT+cggKoqwu76g/AZ4HXtnAV6uq87UVEEpiSf6iqKu/m7uHeCHcOVqyA6mp/4xIR8ZGSf6iFC5tdPRc4Gvg3YG2kSmYt7kdEJJ6U/ENVVDTp1dPgt8BOvCfZZgHnAG+Gq1hX503WLiKSoJT8Q9XURFz1HeBrQA8gB+/qf0Wkynv3+h2ZiIhvlPxDpaW1uqoBEe8MpKf7EY2ISFQo+YfKzIQwE69/jDd+xT68MSoeBp4FpoTbR0oKjBoVxSBFRNpHyT9Ubm7Y4i+Bm4G+eDd87wFKgBHhKjsXcT8iIolAyT9Uv34wdarXYydIX7zhSz/F+yvgRbx5K5swg2nToAMNSS0iyUfJP5y5c72mm8ORkuJtLyKSwJT8w8nOhsJCSE1t23apqd52WS0OqCciElca2C2ShsHZ8vO9fvvNPPFbD7gjjqBbYaEGdRORDkFX/s3Jy4N162D6dK8HUGhTUEoK9OxJzX/8B9OOPJKqGTPiE6eISBtpMpfWqq72hmzYssV7gCs93evOmZsLffuSn5/P+++/zyOPPBLvSEUkibV2Mhclf5/U1tYyatQo5s+fz1lnnRXvcEQkSWkmrxhLTU3l/vvvJy8vj08//TTe4YiINEvJ30cTJ05k0qRJzFVXTxFJcEr+Prvrrrt44oknWL9+fbxDERGJSMnfZ+np6cyfP5+rr76a/fv3xzscEZGwlPyjYMaMGRx//PHccccd8Q5FRCQsJf8oMDMWLFhAUVERWzSpi4gkICX/KBkwYAB33HEHV199NfX19fEOR0TkEEr+UXT11VfTs2dP7rnnnniHIiJyCCX/KOrSpQv/+7//y+23385bb70V73BERBop+UfZiBEjuPHGG/nRj35Eoj5NLSLJR8k/BvLz86mqquKhhx6KdygiIoCSf0x0796dBx54gDlz5lBVVRXvcERElPxj5dRTT+XKK6/kv/7rv+IdioiIJnOJpf/+7/8mMzOT5cuXc/bZZ0NVlTdMdEUF1NRAWhpkZsLMmZoDWESiSkM6x1hpaSm/u/RS/jpmDN1Xr/YK9+37qkJKijdr2NSp3lzA2dnxCVREOiQN6ZygTt+2jaUffkjX5cu9pB+c+MGbMnLfPigpgQkToKgoLnGKSOemZp9YKiqC/Hx6tOaJX+egttabQxg0N7CI+EpX/rFSVuYl8trasKvfAHoCl4euaDgBdMImMBGJH1+Sv5lNMbNtZrbDzG4Ks76HmT0WWL/BzIb4cdwOpaDAa9KJ4FogYut+XZ23vYiIT9qd/M2sK7AAmAqcCFxqZieGVLsK2OucGwb8D/Db9h63Q6mqgpUrvaacMB4Fvg5MjLS9c7BihTeJvIiID/y48h8D7HDO7XTOfYGXy84LqXMeUBx4vwSYaGbmw7E7hoULI676BLgF+F1L+zBrdj8iIm3hR/IfALwXtFwZKAtbxzl3AKgBjvLh2B1DRUXTXj0Bv8T7syijpX3U1YHmBhARnyRUbx8zmwXMAhg0aFCco/FRTU3Y4nJgNfBya/ezd69PAYlIsvMj+e8CBgYtZwTKwtWpNLNuQBrwYeiOnHP3A/eD95CXD7ElhrS0sMVrgbeBhtPcZ0A98CqwOdwG6em+hyYiycmPZp8yYLiZDTWzI4BLgGUhdZYBOYH3FwClLlEfLY6GzEzo2bNJ8SzgTby/AMqBHwFnAU+F20dKCowaFcUgRSSZtDv5B9rwZ+PlrNeAx51zW83sNjM7N1DtT8BRZrYDuAFo0h20U8vNDVucChwb9OqF19c/7Kg+zkXcj4hIW/nS5u+cWwGsCCm7Jej9PuBCP47VIfXr543VU1ISsbsnwK8ilNcDz/fuTe/332e0BnwTER/oCd9YmTvXa7o5DF1SU/kgJ4czzjiD3Nxc3nvvvZY3EhFphpJ/rGRnQ2EhpKa2bbvUVKywkIvmzWP79u0cd9xxjB49mrlz51IToReRiEhLlPxjKS/vqxNAS8+4mXn1CgsbB3VLS0vjN7/5DRUVFezevZsRI0Zwzz338MUXX8QgeBHpTJT8Yy0vD9atg+nTvR5AoU1BKSle+fTpXr0wo3lmZGTw5z//maeffprly5dz0kknsXTpUk0QLyKtpslc4qm62huyYcsW7wGu9HSvO2dubptm8nr66aeZM2cORx55JIWFhZx22mlRC1lEEltrJ3NR8u8k6uvrWbRoETfffDNjxozhzjvvZPjw4fEOS0RiTDN5JZmuXbuSk5PD9u3bycrK4nvf+x7XXXcd1RoJVETCUPLvZFJSUpg7dy6vvfYaZsbIkSP5zW9+Q22ESWREJDkp+XdSffv2Zf78+bzwwgts3ryZ448/noULF1LfmikkRaTTU/Lv5IYPH86SJUt47LHHuP/++znllFN4+umn4x2WiMSZkn+SOO2001i/fj233HIL1157LWeeeSb//Oc/4x2WiMSJkn8SMTNmzJjBq6++yjnnnNM4XERlZWW8QxORGFPyT0Ldu3dn9uzZhwwX8fOf/1zDRYgkESX/JNYwXER5eTnvv/++hosQSSJK/sLAgQNZuHChhosQSSJK/tJo9OjRPPXUUyxYsIDbbruNsWPH8vzzz8c7LBGJAiV/aeKMM85g8+bNzJo1i4svvpgZM2bwxhtvxDssEfGRkr+EpeEiRDo3JX9ploaLEOmclPylVXwZLqKqCubNg8svh3PO8X7Om+cNbS0iMaUhneWwPP/88+Tn51NbW8u8efM444wzIlcuK4OCAli50lvet++rdSkp3qT2U6d68xxnZ0c3cJFOTkM6S1Q1DBfxy1/+svnhIoqKYMIEKCnxkn5w4geoq/PKSkq8ekVFsQhfJOkp+ctha3G4iKIiyM+H2lrv6r45znn18vN1AhCJASV/abdww0UsyM3F3Xijl9BDPAqMBI4Evgn8I3hlwwlATX4iUaXkL74JHi7iu2vWcLCurkmdVcDPgD8DnwLPAt8IrVRX590jEJGo6RbvAKTzGdijBwOrqsKuuxW4BfhuYHlAuErOwYoVXi+gNkxkLyKtpyt/8d/ChWGL64FNQDUwDMgAZgNN/z4AzCLuR0TaT8lf/FdR0bRXD/Av4EtgCV47fznwMnB7uH3U1cGWLVEMUiS5KfmL/yLMC5AS+Hkd0B84GrgBWBFpP3v3+h2ZiAQo+Yv/0tLCFqfjNfVYUJmFrdmwQbp/MYnIIZT8xX+ZmdCzZ9hVM4F7gCpgL/A/wNlh6h3s0QNGjYpaiCLJTslf/JebG3HVL4FsYAReX/+TgV+EqffF/v2c8cgjLFiwgA8//DAaUYokNSV/8V+/ft5YPda0Uac7cC/wMbAbmA80+RvBjCPOP58bCgp47rnn+OY3v8mMGTNYtmwZX375ZbSjF0kKSv4SHXPneoO2HY6UFLr84hdMmTKFxYsX88477zBlyhTmzZtHRkYGP/7xj3n55Zc1zaRIOyj5S3RkZ0NhIaSmtm271FRvu6yvBiVMS0vjP//zP3nuuedYv349vXv3Zvr06YwePZq77rqLDz74wOfgRTo/JX+Jnry8r04AYZqADmH2VeLPy4tYbdiwYdx2223s3LmTe+65h61bt3LiiScybdo0HnvsMfaFeb5ARJpqV/I3sz5mtsrM3gj8DNs3z8zqzaw88FrWnmNKB5OXB+vWwfTpXg+g0KaglBSvfPp0r14ziT9Yly5dGD9+PA8++CCVlZVcdtllPPDAAwwYMIAf/vCHPP/882oWEmlGuyZzMbN5wEfOuTvN7CYg3Tn3szD1PnPO9WrLvjWZSydUXe0N2bBli/cAV3q6150zN9e3MXzee+89Fi1aRHFxMQcPHuTKK6/kiiuuYPDgwb7sXyTRtXYyl/Ym/23ABOfcB2bWH1jrnDs+TD0lf4kp5xwbN26kuLiYxx9/nMzMTHJycpgxYwa9erXpqyjSocRqJq9jnHMNd9t2A8dEqNfTzDaZ2Ytmdn47jynSIjPjO9/5Dvfeey+7du3immuuYenSpQwcOJCcnBxKS0s5ePBgvMMUiZsWr/zNbDVwbJhVvwCKnXNfD6q71znXpN3fzAY453aZ2TeAUmCic+7NMPVmAbMABg0adOo777zTpl9GpCVVVVU88sgjFBcX8+GHH3LFFVeQk5PDiBEj4h2aiC8SqtknZJuFwHLn3JLm6qnZR6KtoqKC4uJiHn74YYYOHUpOTg4XX3wx6RpTSDqwWDX7LANyAu9zgL+FCSTdzHoE3h8N/BvwajuPK9JumZmZ3HXXXVRWVnLzzTdTWlrKkCFDuOiii/j73//OgQMH4h2iSNS0N/nfCUw2szeASYFlzCzLzB4I1BkJbDKzfwJrgDudc0r+kjC6devGWWedxeOPP87bb7/N6aefzu23305GRgY33ngjFRUV8Q5RxHftavaJJjX7SLxt27aNv/zlLzz00EP06dOHnJwcfvCDH9CvX794hyYSUayafUQ6reOPP5477riDt99+m9/97neUl5czYsQIzjnnHJYsWcL+/fvjHaLIYVPyF2lBly5dOP300ykuLqayspILLriAe++9lwEDBnDNNdewYcMGPU0sHY6Sv0gb9OrVq/E5gZdeeonjjjuOyy+/nJEjR1JQUEBlZWW8QxRpFSV/kcM0ePBgbr75ZrZv386DDz7I22+/TWZmJpMnT2bRokV8/vnn8Q5RJCIlf5F2MjNOO+007rvvPnbt2sXVV1/N4sWLycjIYObMmaxdu1ZPE0vCUW8fkSjZvXs3Dz/8MMXFxXz66adcccUVXHnllQwbNqz9O6+q8gbJq6iAmhpIS/PmTp4507dB8qRjiskTvtGk5C+dhXOO8vJyiouLWbx4McOHDycnJ4eLLrqItLS0tu2srAwKCmDlSm85eP6ClBRwzptCc+5cb0IdSTrq6imSIMyMk08+mbvvvpvKykp++tOf8uSTTzJ48GAuvfRSnnzySerr61veUVERTJgAJSVe0g+duKauzisrKfHqFRVF49eRTkLJXySGunfvzrnnnsvSpUt58803GTt2LLfccgsDBw7kpz/9Ka+88kr4DYuKID8famu9q/vmOOfVy8/XCUAiUvIXiZOjjjqKa6+9lo0bN/LMM8/QpUsXpkyZwqmnnsr8+fPZs2ePV7Gs7KvEH+RyoD/QGxgBPBCy/8YTgJpPJQy1+YskkPr6ekpLSykuLmb58uVMmDCBe3fvpv/GjVjI/9WtwDCgB/A6MAH4O3BqcCUzb4rMpUtj8wtI3KnNX6QD6tq1a+NzAu+++y4XTZjAUWESP8BJeIkfwAKvJpNkOAcrVnhTaIoEUfIXSVC9e/fmsi++oEePHhHrXAOkAifgNQFNC1fJzOsWKhJEyV8kkVVUNO3VE+Re4FPgH8D3+eovgUPU1cGWLVEJTzouJX+RRFZT02KVrsBYoBKI2Ldn717/YpJOQclfJJG14SGwA4Rp8w/4slcvX8KRzkPJXySRZWZCz55NiquAR4HPgHrgKWAxMDHMLvZ16cKvnniC8ePH8+tf/5oXXnhBU1SKkr9IQsvNDVtseE08GUA6kA/cDZwbpm7PI47g59u2MXfuXGpqasjLy+Poo4/m/PPP5w9/+AOvv/665iNIQurnL5Lovv99b8iGw/m/GqGff1VVFc888wyrV69m1apVOOeYNGkSkyZNYuLEiRx77LE+BS+xpoHdRDqLsjJvrJ6QJ3xbJTUV1q2DrMi5wDnHjh07Gk8Ea9asISMjg8mTJzNp0iTGjRtHL90z6DCU/EU6k+CxfVorNRUKCyEvr02Hqq+v56WXXmo8GZSVlXHKKacwadIkJk+eTHZ2Nt26dWvjLyCxouQv0tk0nADq6ppvAjLzhnc+jMQfzueff85zzz3H6tWrWb16NW+99Rbjx49vbCY64YQTMLN2H0f8oeQv0hlt2uSN579ihZfk6+q+Wtcwnv+0ad54/s009bRHVVUVpaWljX8Z1NfXH3K/oH///lE5rrSOkr9IZ1Zd7Q3ZsGWL9wBXejqMGuX1DorhTF7OOd58801WrVrF6tWrWbNmDQMGDGg8GYwbN46vfe1rMYtHlPxFJA7q6+vZvHlz48lg48aNnHzyyY03j7Ozs+nevXu8w+zUlPxFJO5qa2t57rnnGk8Gb731FuPGjWs8Geh+gf+U/EUk4VRXVx9yv+DLL79sbCKaNGmS7hf4QMlfRBJaw/2Chl5EpaWlHHfccY0ngvHjx+t+wWFQ8heRDqXhfkHDyWDjxo18+9vfbjwZjBkzRvcLWkHJX0Q6tIb7BQ0ng507dzJu3LjGk8HIkSN1vyAMJX8R6VSqq6tZs2YNq1atanK/YOLEiRx33HHRD6KqyutiW1HhzbWQluaNvDpzZky72DZHyV9EOi3nHDt37my8cVxaWkr//v0bexH5fr+grMx7uG7lSm85eHa1hofrpk71Hq7LzvbvuIdByV9EkkZ9fT0vv/xyYxPRhg0bGD16dON4RO26XxCnYTUOl5K/iCSt2tpa1q9f33gy2LFjR+P9gsmTJ7f+fkEMB9Tzi5K/iEjAnj17Dnm+YP/+/YfcLxgwYEDTjcIMpf0HYCGwBbg08D6sVgylHS1K/iIiEezcubPxqePS0lKOPfbYQ54v6N27d9hJdJ7Am/7wKaCOZpJ/hEl0YkHJX0SkFerr6ykvL288GWzYsIFxJ5xASXk53SPMdXwzUEkzyR+8uZfffTfmvYBam/zbNYevmV1oZlvN7KCZRTyYmU0xs21mtsPMbmrPMUVE/NS1a1dOPfVUbrrpJlavXk1VVRX/8+1vt3/HZl630ATV3gncXwG+DzwbqYKZdQUWAFOBE4FLzezEdh5XRCQqUlJSGFFXF/Gqv9Xq6rwhtxNUu+Zic869BrR013wMsMM5tzNQ91HgPODV9hxbRCRqamr82c/evf7sJwrae+XfGgOA94KWKwNlTZjZLDPbZGabqqurYxCaiEgYaWn+7Cc93Z/9REGLyd/MVpvZK2Fe5/kdjHPufudclnMuq2+CPCotIkkoM9O7YRviALAPqA+89gXKwkpJ8WZXS1AtJn/n3CTn3LfCvP7WymPsAgYGLWcEykREElNubtji24EU4E5gUeD97ZH24VzE/SSCWDT7lAHDzWyomR0BXAIsi8FxRUQOT79+3lg9IfczfwW4kNevwm1vBtOmJcxgb+G0t6vndDOrBL4H/N3MngqUH2dmKwCccweA2XjPRbwGPO6c29q+sEVEomzuXK/p5nCkpHjbJ7B2JX/n3F+dcxnOuR7OuWOcc2cGyt93zk0LqrfCOTfCOfdN59wd7Q1aRCTqsrO9MXpSU9u2XcPYPnEY2qEt2tXVU0SkU2sYnK0DjerZWrFo8xcR6bjy8rxB2qZP93onw8/IAAAFXElEQVQAhTYFpaR45dOne/U6QOIHXfmLiLQsK8sbpK262huyYcsW7wGu9HSvO2dubkLf3A1HyV9EpLX69oU5c+IdhS/U7CMikoSU/EVEkpCSv4hIElLyFxFJQkr+IiJJSMlfRCQJKfmLiCQhJX8RkSRkrrmxKuLIzKqBd+IdB3A0sCfeQbRRR4wZOmbcijk2FHPrDXbOtfi4ccIm/0RhZpucc4k9PF+IjhgzdMy4FXNsKGb/qdlHRCQJKfmLiCQhJf+W3R/vAA5DR4wZOmbcijk2FLPP1OYvIpKEdOUvIpKElPwBM+tjZqvM7I3Az/QI9Z40s4/NbHlI+VAz22BmO8zsMTM7IoFizgnUecPMcoLK15rZNjMrD7z6RTHWKYFj7TCzm8Ks7xH43HYEPschQevmBsq3mdmZ0YrRr5jNbIiZ1QV9rn9MoJjHmdlmMztgZheErAv7PUnwmOuDPudlsYo5cOyW4r7BzF41swoze8bMBgeti8tn3YRzLulfwDzgpsD7m4DfRqg3ETgHWB5S/jhwSeD9H4G8RIgZ6APsDPxMD7xPD6xbC2TFIM6uwJvAN4AjgH8CJ4bUuQb4Y+D9JcBjgfcnBur3AIYG9tM1wWMeArwSh+9wa2IeAmQCfwEuaM33JFFjDqz7LNafcxvi/g8gNfA+L+j7EZfPOtxLV/6e84DiwPti4PxwlZxzzwCfBpeZmQGnA0ta2t5nrYn5TGCVc+4j59xeYBUwJQaxBRsD7HDO7XTOfQE8ihd7sODfZQkwMfC5ngc86pzb75x7C9gR2F8ixxwvLcbsnHvbOVcBHAzZNl7fk/bEHE+tiXuNc642sPgikBF4nwj/JwE1+zQ4xjn3QeD9buCYNmx7FPCxc+5AYLkSGOBncBG0JuYBwHtBy6Gx/TnwJ/Mvo5i4WorhkDqBz7EG73NtzbbR0J6YAYaa2ctmts7M/j3awYbGE9CWzyqRP+fm9DSzTWb2opnF4oKrQVvjvgpYeZjbRk3SzOFrZquBY8Os+kXwgnPOmVlCdIGKcsw/cM7tMrOvAUuBK/D+tJb2+QAY5Jz70MxOBUrM7CTn3CfxDqwTGhz4Dn8DKDWzLc65N+MdVDAzuxzIAsbHO5ZQSZP8nXOTIq0zs3+ZWX/n3Adm1h+oasOuPwS+bmbdAleAGcCudoYL+BLzLmBC0HIGXls/zrldgZ+fmtkjeH/KRiP57wIGhsQQ+vk01Kk0s25AGt7n2ppto+GwY3Zew+5+AOfcS2b2JjAC2JQAMTe37YSQbdf6ElXLxz3sf9+g7/BOM1sLnIzXFh9trYrbzCbhXaiNd87tD9p2Qsi2a6MSZQvU7ONZBjTcdc8B/tbaDQP/2dcADT0R2rR9O7Qm5qeAM8wsPdAb6AzgKTPrZmZHA5hZd+Bs4JUoxVkGDDevR9QReDdHQ3tmBP8uFwClgc91GXBJoGfNUGA4sDFKcfoSs5n1NbOuAIEr0uF4N/USIeZIwn5PohRnsMOOORBrj8D7o4F/A16NWqSHajFuMzsZuA841zkXfGEWr8+6qXjcZU60F15b7TPAG8BqoE+gPAt4IKjeP4BqoA6vre7MQPk38JLSDuD/Aj0SKOb/E4hrBzAzUHYk8BJQAWwFfk8Ue9EA04DteFdlvwiU3Yb3HwOgZ+Bz2xH4HL8RtO0vAtttA6bG8DtxWDEDMwKfaTmwGTgngWLODnxvP8f7y2prc9+TRI4ZOA3YgtfTZgtwVaxibmXcq4F/Bb4H5cCyeH/WoS894SsikoTU7CMikoSU/EVEkpCSv4hIElLyFxFJQkr+IiJJSMlfRCQJKfmLiCQhJX8RkST0/wF2+3k3Jt9iaAAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'C', 6: 'O', 7: 'O'}\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " pattern 0: [0, 1, 2, 3, 4, 5, 6, 7]\n", " treelet 0: ['C', 'C', 'C', 'C', 'C', 'C', 'O', 'O']\n", "\n", " pattern 1 : [[4, 0], [4, 1], [5, 4], [6, 2], [6, 5], [7, 3], [7, 5]]\n", " treelet 1 : ['1C1C', '1C1C', '1C1C', '1C1O', '1C1O', '1C1O', '1C1O']\n", "\n", " pattern 2 : [[1, 4, 0], [5, 4, 0], [5, 4, 1], [5, 6, 2], [5, 7, 3], [6, 5, 4], [7, 5, 4], [7, 5, 6]]\n", " treelet 2 : ['2C1C1C', '2C1C1C', '2C1C1C', '2C1O1C', '2C1O1C', '2C1C1O', '2C1C1O', '2O1C1O']\n", "\n", " pattern 3 : [[4, 5, 6, 2], [4, 5, 7, 3], [6, 5, 4, 0], [6, 5, 4, 1], [6, 5, 7, 3], [7, 5, 4, 0], [7, 5, 4, 1], [7, 5, 6, 2]]\n", " treelet 3 : ['3C1C1O1C', '3C1C1O1C', '3C1C1C1O', '3C1C1C1O', '3C1O1C1O', '3C1C1C1O', '3C1C1C1O', '3C1O1C1O']\n", "\n", " pattern 4 : [[2, 6, 5, 4, 0], [2, 6, 5, 4, 1], [3, 7, 5, 4, 0], [3, 7, 5, 4, 1], [3, 7, 5, 6, 2]]\n", " treelet 4 : ['4C1C1C1O1C', '4C1C1C1O1C', '4C1C1C1O1C', '4C1C1C1O1C', '4C1O1C1O1C']\n", "\n", " pattern 5 : []\n", " treelet 5 : []\n", "\n", " pattern 3 star: [[4, 0, 1, 5], [5, 4, 6, 7]]\n", " treelet 3 star: ['6CC1C1C1', '6CC1O1O1']\n", "\n", " pattern 4 star: []\n", " treelet 4 star: []\n", "\n", " pattern 5 star: []\n", " treelet 5 star: []\n", "\n", " pattern 7: [[4, 0, 1, 5, 6], [4, 0, 1, 5, 7], [5, 7, 6, 4, 0], [5, 7, 6, 4, 1], [5, 4, 7, 6, 2], [5, 4, 6, 7, 3]]\n", " treelet 7: ['7CC1C1C1O1', '7CC1C1C1O1', '7CO1O1C1C1', '7CO1O1C1C1', '7CC1O1O1C1', '7CC1O1O1C1']\n", "\n", " pattern 11: []\n", " treelet 11: []\n", "\n", " pattern 10: [[4, 0, 1, 5, 6, 2], [4, 0, 1, 5, 7, 3]]\n", " treelet 10: ['aCO1C1C1C1C1', 'aCO1C1C1C1C1']\n", "\n", " pattern 12: [[4, 0, 1, 5, 7, 6]]\n", " treelet 12: ['cCC1C1C1O1O1']\n", "\n", " pattern 9: [[5, 7, 6, 4, 2, 0], [5, 7, 6, 4, 2, 1], [5, 6, 7, 4, 3, 0], [5, 6, 7, 4, 3, 1], [5, 4, 7, 6, 3, 2]]\n", " treelet 9: ['9CO1C1O1C1C1', '9CO1C1O1C1C1', '9CO1C1O1C1C1', '9CO1C1O1C1C1', '9CC1O1O1C1C1']\n", "\n", " numbers of canonical keys: {'2O1C1O': 1, '7CC1C1C1O1': 2, '7CC1O1O1C1': 2, 'aCO1C1C1C1C1': 2, '2C1C1C': 3, '6CC1C1C1': 1, '9CO1C1O1C1C1': 4, '1C1C': 3, '3C1C1C1O': 4, '4C1C1C1O1C': 4, '7CO1O1C1C1': 2, '2C1C1O': 2, '1C1O': 4, '9CC1O1O1C1C1': 1, '3C1C1O1C': 2, '6CC1O1O1': 1, '2C1O1C': 2, '0O': 2, '4C1O1C1O1C': 1, 'cCC1C1C1O1O1': 1, '0C': 6, '3C1O1C1O': 2}\n", "\n", " pattern 0: [0, 1, 2, 3, 4, 5, 6]\n", " treelet 0: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n", "\n", " pattern 1 : [[2, 0], [3, 1], [5, 2], [5, 4], [6, 3], [6, 4]]\n", " treelet 1 : ['1C1C', '1C1C', '1C1O', '1C1O', '1C1O', '1C1O']\n", "\n", " pattern 2 : [[4, 5, 2], [4, 6, 3], [5, 2, 0], [6, 3, 1], [6, 4, 5]]\n", " treelet 2 : ['2C1O1C', '2C1O1C', '2C1C1O', '2C1C1O', '2O1C1O']\n", "\n", " pattern 3 : [[4, 5, 2, 0], [4, 6, 3, 1], [5, 4, 6, 3], [6, 4, 5, 2]]\n", " treelet 3 : ['3C1C1O1C', '3C1C1O1C', '3C1O1C1O', '3C1O1C1O']\n", "\n", " pattern 4 : [[3, 6, 4, 5, 2], [5, 4, 6, 3, 1], [6, 4, 5, 2, 0]]\n", " treelet 4 : ['4C1O1C1O1C', '4C1C1O1C1O', '4C1C1O1C1O']\n", "\n", " pattern 5 : [[2, 5, 4, 6, 3, 1], [3, 6, 4, 5, 2, 0]]\n", " treelet 5 : ['5C1C1O1C1O1C', '5C1C1O1C1O1C']\n", "\n", " pattern 3 star: []\n", " treelet 3 star: []\n", "\n", " pattern 4 star: []\n", " treelet 4 star: []\n", "\n", " pattern 5 star: []\n", " treelet 5 star: []\n", "\n", " pattern 7: []\n", " treelet 7: []\n", "\n", " pattern 11: []\n", " treelet 11: []\n", "\n", " pattern 10: []\n", " treelet 10: []\n", "\n", " pattern 12: []\n", " treelet 12: []\n", "\n", " pattern 9: []\n", " treelet 9: []\n", "\n", " numbers of canonical keys: {'3C1C1O1C': 2, '2O1C1O': 1, '1C1O': 4, '2C1O1C': 2, '0O': 2, '5C1C1O1C1O1C': 2, '1C1C': 2, '4C1O1C1O1C': 1, '0C': 5, '3C1O1C1O': 2, '4C1C1O1C1O': 2, '2C1C1O': 2}\n" ] } ], "source": [ "import sys\n", "import pathlib\n", "from collections import Counter\n", "from itertools import chain\n", "sys.path.insert(0, \"../\")\n", "\n", "import networkx as nx\n", "import numpy as np\n", "import time\n", "\n", "from sklearn.metrics.pairwise import rbf_kernel, paired_distances\n", "import matplotlib.pyplot as plt\n", "\n", "# main\n", "import sys\n", "from collections import Counter\n", "import networkx as nx\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "\n", "\n", "def main(): \n", " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", " G1 = dataset[15]\n", " print(nx.get_node_attributes(G1, 'label'))\n", " nx.draw_networkx(G1)\n", " plt.show()\n", " G2 = dataset[57] # 180 double 4, 57, 3, double 3\n", " print(nx.get_node_attributes(G2, 'label'))\n", " nx.draw_networkx(G2)\n", " plt.show()\n", "\n", " treeletkernel(G1, G2, labeled = True)\n", " # Kmatrix = weisfeilerlehmankernel(G1, G2)\n", " \n", "def find_paths(G, source_node, length):\n", " if length == 0:\n", " return [[source_node]]\n", " path = [ [source_node] + path for neighbor in G[source_node] \\\n", " for path in find_paths(G, neighbor, length - 1) if source_node not in path ]\n", " return path\n", "\n", "def find_all_paths(G, length):\n", " all_paths = []\n", " for node in G:\n", " all_paths.extend(find_paths(G, node, length))\n", " all_paths_r = [ path[::-1] for path in all_paths ]\n", " \n", " # remove double direction\n", " for idx, path in enumerate(all_paths[:-1]):\n", " for path2 in all_paths_r[idx+1::]:\n", " if path == path2:\n", " all_paths[idx] = []\n", " break\n", " \n", " return list(filter(lambda a: a != [], all_paths))\n", "\n", "def get_canonkey(G, node_label = 'atom', edge_label = 'bond_type', labeled = True):\n", " \n", " patterns = {}\n", " canonkey = {} # canonical key\n", " \n", " ### structural analysis ###\n", " # linear patterns\n", " patterns['0'] = G.nodes()\n", " canonkey['0'] = nx.number_of_nodes(G)\n", " for i in range(1, 6):\n", " patterns[str(i)] = find_all_paths(G, i)\n", " canonkey[str(i)] = len(patterns[str(i)])\n", " \n", " # n-star patterns\n", " patterns['3star'] = [ [node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3 ]\n", " patterns['4star'] = [ [node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4 ]\n", " patterns['5star'] = [ [node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5 ] \n", " # n-star patterns\n", " canonkey['6'] = len(patterns['3star'])\n", " canonkey['8'] = len(patterns['4star'])\n", " canonkey['d'] = len(patterns['5star'])\n", " \n", " # pattern 7\n", " patterns['7'] = []\n", " for pattern in patterns['3star']:\n", " for i in range(1, len(pattern)):\n", " if G.degree(pattern[i]) >= 2:\n", " pattern_t = pattern[:]\n", " pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i]\n", " for neighborx in G[pattern[i]]:\n", " if neighborx != pattern[0]:\n", " new_pattern = pattern_t + [ neighborx ]\n", "# new_patterns = [ pattern + [neighbor] for neighbor in G[pattern[i]] if neighbor != pattern[0] ]\n", " patterns['7'].append(new_pattern)\n", " canonkey['7'] = len(patterns['7'])\n", " \n", " # pattern 11\n", " patterns['11'] = []\n", " for pattern in patterns['4star']:\n", " for i in range(1, len(pattern)):\n", " if G.degree(pattern[i]) >= 2:\n", " pattern_t = pattern[:]\n", " pattern_t[i], pattern_t[4] = pattern_t[4], pattern_t[i]\n", " for neighborx in G[pattern[i]]:\n", " if neighborx != pattern[0]:\n", " new_pattern = pattern_t + [ neighborx ]\n", "# new_patterns = [ pattern + [neighborx] for neighborx in G[pattern[i]] if neighborx != pattern[0] ]\n", " patterns['11'].append(new_pattern)\n", " canonkey['b'] = len(patterns['11'])\n", " \n", " # pattern 12\n", " patterns['12'] = []\n", " rootlist = []\n", " for pattern in patterns['3star']:\n", "# print(pattern)\n", " if pattern[0] not in rootlist:\n", " rootlist.append(pattern[0])\n", " for i in range(1, len(pattern)):\n", " if G.degree(pattern[i]) >= 3:\n", " rootlist.append(pattern[i])\n", " pattern_t = pattern[:]\n", " pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i]\n", " for neighborx1 in G[pattern[i]]:\n", " if neighborx1 != pattern[0]:\n", " for neighborx2 in G[pattern[i]]:\n", " if neighborx1 > neighborx2 and neighborx2 != pattern[0]:\n", " new_pattern = pattern_t + [neighborx1] + [neighborx2]\n", "# new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ]\n", " patterns['12'].append(new_pattern)\n", " canonkey['c'] = int(len(patterns['12']) / 2)\n", " \n", " # pattern 9\n", " patterns['9'] = []\n", " for pattern in patterns['3star']:\n", "# print('pattern: ', pattern)\n", " for pairs in [ [neighbor1, neighbor2] for neighbor1 in G[pattern[0]] if G.degree(neighbor1) >= 2 \\\n", " for neighbor2 in G[pattern[0]] if G.degree(neighbor2) >= 2 if neighbor1 > neighbor2 ]:\n", "# print('pairs: ', pairs)\n", " pattern_t = pattern[:]\n", "# print('pattern_t: ', pattern_t)\n", " pattern_t[pattern_t.index(pairs[0])], pattern_t[2] = pattern_t[2], pattern_t[pattern_t.index(pairs[0])]\n", "# print('pattern_t: ', pattern_t)\n", " pattern_t[pattern_t.index(pairs[1])], pattern_t[3] = pattern_t[3], pattern_t[pattern_t.index(pairs[1])]\n", "# print('pattern_t: ', pattern_t)\n", " for neighborx1 in G[pairs[0]]:\n", " if neighborx1 != pattern[0]:\n", " for neighborx2 in G[pairs[1]]:\n", " if neighborx2 != pattern[0]:\n", " new_pattern = pattern_t + [neighborx1] + [neighborx2]\n", "# new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pairs[0]] if neighborx1 != pattern[0] for neighborx2 in G[pairs[1]] if neighborx2 != pattern[0] ]\n", " patterns['9'].append(new_pattern)\n", " canonkey['9'] = len(patterns['9'])\n", " \n", " # pattern 10\n", " patterns['10'] = []\n", " for pattern in patterns['3star']: \n", " for i in range(1, len(pattern)):\n", " if G.degree(pattern[i]) >= 2:\n", " for neighborx in G[pattern[i]]:\n", " if neighborx != pattern[0] and G.degree(neighborx) >= 2:\n", " pattern_t = pattern[:]\n", " pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i]\n", " new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ]\n", " patterns['10'].extend(new_patterns)\n", " canonkey['a'] = len(patterns['10'])\n", " \n", " ### labeling information ###\n", " if labeled == True:\n", " canonkey_l = {}\n", " \n", " # linear patterns\n", " canonkey_t = Counter(list(nx.get_node_attributes(G, node_label).values()))\n", " for key in canonkey_t:\n", " canonkey_l['0' + key] = canonkey_t[key]\n", " print('\\n pattern 0: ', patterns['0'])\n", " print(' treelet 0: ', list(nx.get_node_attributes(G, node_label).values()))\n", " \n", " for i in range(1, 6):\n", " treelet = []\n", " for pattern in patterns[str(i)]:\n", " canonlist = list(chain.from_iterable((G.node[node][node_label], \\\n", " G[node][pattern[idx+1]][edge_label]) for idx, node in enumerate(pattern[:-1])))\n", " canonlist.append(G.node[pattern[-1]][node_label])\n", " canonkey_t = ''.join(canonlist)\n", " canonkey_t = canonkey_t if canonkey_t < canonkey_t[::-1] else canonkey_t[::-1]\n", " treelet.append(str(i) + canonkey_t)\n", " canonkey_l.update(Counter(treelet))\n", " print('\\n pattern', i, ': ', patterns[str(i)])\n", " print(' treelet', i, ': ', treelet)\n", " \n", "# print(canonkey_l)\n", " \n", " # n-star patterns\n", " for i in range(3, 6):\n", " treelet = []\n", " for pattern in patterns[str(i) + 'star']:\n", " canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:] ]\n", " canonlist.sort()\n", " canonkey_t = ('d' if i == 5 else str(i * 2)) + G.node[pattern[0]][node_label] + ''.join(canonlist)\n", " treelet.append(canonkey_t)\n", " canonkey_l.update(Counter(treelet))\n", " print('\\n pattern', i, 'star: ', patterns[str(i) + 'star'])\n", " print(' treelet', i, 'star: ', treelet)\n", " \n", " # pattern 7\n", " treelet = []\n", " for pattern in patterns['7']:\n", " canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ]\n", " canonlist.sort()\n", " canonkey_t = '7' + G.node[pattern[0]][node_label] + ''.join(canonlist) \\\n", " + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \\\n", " + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label]\n", " treelet.append(canonkey_t)\n", " canonkey_l.update(Counter(treelet))\n", " print('\\n pattern 7: ', patterns['7'])\n", " print(' treelet 7: ', treelet)\n", " \n", " # pattern 11\n", " treelet = []\n", " for pattern in patterns['11']:\n", " canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:4] ]\n", " canonlist.sort()\n", " canonkey_t = 'b' + G.node[pattern[0]][node_label] + ''.join(canonlist) \\\n", " + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[0]][edge_label] \\\n", " + G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label]\n", " treelet.append(canonkey_t)\n", " canonkey_l.update(Counter(treelet))\n", " print('\\n pattern 11: ', patterns['11'])\n", " print(' treelet 11: ', treelet)\n", "\n", " # pattern 10\n", " treelet = []\n", " for pattern in patterns['10']:\n", " canonkey4 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label]\n", " canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ]\n", " canonlist.sort()\n", " canonkey0 = ''.join(canonlist)\n", " canonkey_t = 'a' + G.node[pattern[3]][node_label] \\\n", " + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label] \\\n", " + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \\\n", " + canonkey4 + canonkey0\n", "# canonkey_t = 'a' + G.node[pattern[0]][node_label] + ''.join(canonlist) \\\n", "# + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \\\n", "# + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label]\n", " treelet.append(canonkey_t)\n", " canonkey_l.update(Counter(treelet))\n", " print('\\n pattern 10: ', patterns['10'])\n", " print(' treelet 10: ', treelet)\n", " \n", " # pattern 12\n", " treelet = []\n", " for pattern in patterns['12']:\n", " canonlist0 = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ]\n", " canonlist0.sort()\n", " canonlist3 = [ G.node[leaf][node_label] + G[leaf][pattern[3]][edge_label] for leaf in pattern[4:6] ]\n", " canonlist3.sort()\n", " canonkey_t1 = 'c' + G.node[pattern[0]][node_label] \\\n", " + ''.join(canonlist0) \\\n", " + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \\\n", " + ''.join(canonlist3)\n", " \n", " canonkey_t2 = 'c' + G.node[pattern[3]][node_label] \\\n", " + ''.join(canonlist3) \\\n", " + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \\\n", " + ''.join(canonlist0)\n", " \n", " treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2)\n", " canonkey_l.update(Counter(treelet))\n", " print('\\n pattern 12: ', patterns['12'])\n", " print(' treelet 12: ', treelet)\n", " \n", " # pattern 9\n", " treelet = []\n", " for pattern in patterns['9']:\n", " canonkey2 = G.node[pattern[4]][node_label] + G[pattern[4]][pattern[2]][edge_label]\n", " canonkey3 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[3]][edge_label]\n", " prekey2 = G.node[pattern[2]][node_label] + G[pattern[2]][pattern[0]][edge_label]\n", " prekey3 = G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label]\n", " if prekey2 + canonkey2 < prekey3 + canonkey3:\n", " canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \\\n", " + prekey2 + prekey3 + canonkey2 + canonkey3\n", " else:\n", " canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \\\n", " + prekey3 + prekey2 + canonkey3 + canonkey2\n", " treelet.append('9' + G.node[pattern[0]][node_label] + canonkey_t)\n", " canonkey_l.update(Counter(treelet))\n", " print('\\n pattern 9: ', patterns['9'])\n", " print(' treelet 9: ', treelet)\n", " \n", "\n", " \n", " \n", " print('\\n numbers of canonical keys: ', canonkey_l)\n", " \n", " \n", " return canonkey_l\n", " \n", " return canonkey\n", " \n", "\n", "def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True):\n", " if len(args) == 1: # for a list of graphs\n", " Gn = args[0]\n", " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", "\n", " start_time = time.time()\n", " \n", " for i in range(0, len(Gn)):\n", " print(i)\n", " for j in range(i, len(Gn)):\n", " Kmatrix[i][j] = treeletkernel(Gn[i], Gn[j], labeled = labeled, node_label = node_label, edge_label = edge_label)\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", "\n", " run_time = time.time() - start_time\n", " print(\"\\n --- treelet kernel matrix of size %d built in %s seconds ---\" % (len(Gn), run_time))\n", " \n", " return Kmatrix, run_time\n", " \n", " else: # for only 2 graphs\n", " \n", " G1 = args[0]\n", " G = args[1]\n", " kernel = 0\n", " \n", "# start_time = time.time()\n", " \n", " \n", " canonkey2 = get_canonkey(G, node_label = node_label, edge_label = edge_label, labeled = labeled)\n", " canonkey1 = get_canonkey(G1, node_label = node_label, edge_label = edge_label, labeled = labeled)\n", " \n", " keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs\n", " vector1 = np.matrix([ (canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys ])\n", "# print(vector1)\n", " vector2 = np.matrix([ (canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys ]) \n", " kernel = np.sum(np.exp(- np.square(vector1 - vector2) / 2))\n", "# print(vector2)\n", " \n", " # labeling information\n", " \n", " # equal keys and graph isomorphism\n", " \n", "\n", "# run_time = time.time() - start_time\n", "# print(\"\\n --- treelet kernel built in %s seconds ---\" % (run_time))\n", " \n", "# print(kernel)\n", " return kernel#, run_time\n", " \n", "if __name__ == '__main__':\n", " main()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" }, "name": "run_treeletkernel_acyclic.ipynb" }, "nbformat": 4, "nbformat_minor": 2 }