diff --git a/.ipynb_checkpoints/run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb b/.ipynb_checkpoints/run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb new file mode 100644 index 0000000..6371cc3 --- /dev/null +++ b/.ipynb_checkpoints/run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb @@ -0,0 +1,2015 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a regression problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 0.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.38979601860046387 seconds ---\n", + "[[ 5. 6. 4. ..., 20. 20. 20.]\n", + " [ 6. 8. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 5. ..., 21. 21. 21.]\n", + " ..., \n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 17.681582\n", + "With standard deviation: 0.713183\n", + "\n", + " Mean performance on test set: 15.685879\n", + "With standard deviation: 4.139197\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 1.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.8205692768096924 seconds ---\n", + "[[ 10. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 16. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 10. ..., 22. 22. 24.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 24. ..., 122. 122. 154.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 6.270014\n", + "With standard deviation: 0.654734\n", + "\n", + " Mean performance on test set: 7.550458\n", + "With standard deviation: 2.331786\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 2.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.375309705734253 seconds ---\n", + "[[ 15. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 24. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 15. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 159. 151. 124.]\n", + " [ 20. 20. 22. ..., 151. 153. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 185.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 4.450682\n", + "With standard deviation: 0.882129\n", + "\n", + " Mean performance on test set: 9.728466\n", + "With standard deviation: 2.057669\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 3.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.8636789321899414 seconds ---\n", + "[[ 20. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 32. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 20. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 188. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 168. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 202.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 2.270586\n", + "With standard deviation: 0.481516\n", + "\n", + " Mean performance on test set: 11.296110\n", + "With standard deviation: 2.799944\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 4.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.5077457427978516 seconds ---\n", + "[[ 25. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 40. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 25. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 217. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 183. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 213.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 1.074035\n", + "With standard deviation: 0.637823\n", + "\n", + " Mean performance on test set: 12.808303\n", + "With standard deviation: 3.446939\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 5.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.8235607147216797 seconds ---\n", + "[[ 30. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 48. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 30. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 246. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 198. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 224.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.700602\n", + "With standard deviation: 0.572640\n", + "\n", + " Mean performance on test set: 14.017923\n", + "With standard deviation: 3.675042\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 6.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.458494186401367 seconds ---\n", + "[[ 35. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 56. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 35. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 275. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 213. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 235.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691515\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 14.918434\n", + "With standard deviation: 3.805352\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 7.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.861224889755249 seconds ---\n", + "[[ 40. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 64. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 40. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 304. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 228. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 246.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691516\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 15.629476\n", + "With standard deviation: 3.865387\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 8.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.295838117599487 seconds ---\n", + "[[ 45. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 72. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 45. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 333. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 243. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 257.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691515\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 16.214369\n", + "With standard deviation: 3.928756\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 9.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.008287668228149 seconds ---\n", + "[[ 50. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 80. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 50. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 362. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 258. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 268.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691515\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 16.725744\n", + "With standard deviation: 3.993095\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 10.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.347799301147461 seconds ---\n", + "[[ 55. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 88. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 55. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 391. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 273. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 279.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691516\n", + "With standard deviation: 0.564621\n", + "\n", + " Mean performance on test set: 17.186401\n", + "With standard deviation: 4.056724\n", + "\n", + "\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 15.6859 4.1392 17.6816 0.713183 0.389796\n", + " 1 7.55046 2.33179 6.27001 0.654734 0.820569\n", + " 2 9.72847 2.05767 4.45068 0.882129 1.37531\n", + " 3 11.2961 2.79994 2.27059 0.481516 1.86368\n", + " 4 12.8083 3.44694 1.07403 0.637823 2.50775\n", + " 5 14.0179 3.67504 0.700602 0.57264 2.82356\n", + " 6 14.9184 3.80535 0.691515 0.56462 3.45849\n", + " 7 15.6295 3.86539 0.691516 0.56462 3.86122\n", + " 8 16.2144 3.92876 0.691515 0.56462 4.29584\n", + " 9 16.7257 3.9931 0.691515 0.56462 5.00829\n", + " 10 17.1864 4.05672 0.691516 0.564621 5.3478\n" + ] + } + ], + "source": [ + "# wl subtree kernel\n", + "%load_ext line_profiler\n", + "\n", + "import numpy as np\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type')\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "\n", + "# %lprun -f _wl_subtreekernel_do \\\n", + "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'NUMPY_MKL'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minsert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"../\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpygraph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mkernel_train_test\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpygraph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkernels\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mweisfeilerLehmanKernel\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_wl_subtreekernel_do\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mE:\\课程及课件\\Doctorant\\py-graph\\pygraph\\utils\\utils.py\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 183\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 184\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mrandom\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 185\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkernel_ridge\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mKernelRidge\u001b[0m \u001b[1;31m# 0.17\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 186\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmean_squared_error\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msvm\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python\\python36\\lib\\site-packages\\sklearn\\__init__.py\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 132\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 133\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0m__check_build\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 134\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mbase\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mclone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 135\u001b[0m \u001b[0m__check_build\u001b[0m \u001b[1;31m# avoid flakes unused variable error\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python\\python36\\lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 11\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msparse\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 12\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mexternals\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msix\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfixes\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msignature\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python\\python36\\lib\\site-packages\\scipy\\__init__.py\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[0m__all__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'test'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 61\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_distributor_init\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mNUMPY_MKL\u001b[0m \u001b[1;31m# requires numpy+mkl\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 62\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mshow_config\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mshow_numpy_config\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mImportError\u001b[0m: cannot import name 'NUMPY_MKL'" + ] + } + ], + "source": [ + "# WL sp kernel\n", + "%load_ext line_profiler\n", + "\n", + "import numpy as np\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'sp')\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "\n", + "# %lprun -f _wl_subtreekernel_do \\\n", + "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# results\n", + "\n", + "# with y normalization\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n", + " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n", + " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n", + " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n", + " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n", + " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n", + " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n", + " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n", + " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n", + " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n", + " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n", + "\n", + "# without y normalization\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n", + " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n", + " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n", + " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n", + " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n", + " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n", + " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n", + " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n", + " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n", + " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n", + " 10 17.1864 4.05672 0.691516 0.564621 5.00918" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "- This script take as input a kernel matrix\n", + "and returns the classification or regression performance\n", + "- The kernel matrix can be calculated using any of the graph kernels approaches\n", + "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", + "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", + "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", + "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", + "correspond to the average of the performances on the test sets. \n", + "\n", + "@references\n", + " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + "\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3920705318450928 seconds ---\n", + "[[ 5. 6. 4. ..., 20. 20. 20.]\n", + " [ 6. 8. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 5. ..., 21. 21. 21.]\n", + " ..., \n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]\n", + " [ 20. 20. 21. ..., 101. 101. 101.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 17.681582\n", + "With standard deviation: 0.713183\n", + "\n", + " Mean performance on test set: 15.685879\n", + "With standard deviation: 4.139197\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 1 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.8578901290893555 seconds ---\n", + "[[ 10. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 16. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 10. ..., 22. 22. 24.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 22. ..., 130. 130. 122.]\n", + " [ 20. 20. 24. ..., 122. 122. 154.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 6.270014\n", + "With standard deviation: 0.654734\n", + "\n", + " Mean performance on test set: 7.550458\n", + "With standard deviation: 2.331786\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 2 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.264050006866455 seconds ---\n", + "[[ 15. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 24. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 15. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 159. 151. 124.]\n", + " [ 20. 20. 22. ..., 151. 153. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 185.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 4.450682\n", + "With standard deviation: 0.882129\n", + "\n", + " Mean performance on test set: 9.728466\n", + "With standard deviation: 2.057669\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 3 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.731236219406128 seconds ---\n", + "[[ 20. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 32. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 20. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 188. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 168. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 202.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 2.270586\n", + "With standard deviation: 0.481516\n", + "\n", + " Mean performance on test set: 11.296110\n", + "With standard deviation: 2.799944\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 4 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.1112847328186035 seconds ---\n", + "[[ 25. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 40. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 25. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 217. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 183. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 213.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 1.074035\n", + "With standard deviation: 0.637823\n", + "\n", + " Mean performance on test set: 12.808303\n", + "With standard deviation: 3.446939\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 5 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.4751319885253906 seconds ---\n", + "[[ 30. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 48. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 30. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 246. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 198. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 224.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.700602\n", + "With standard deviation: 0.572640\n", + "\n", + " Mean performance on test set: 14.017923\n", + "With standard deviation: 3.675042\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 6 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.8712213039398193 seconds ---\n", + "[[ 35. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 56. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 35. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 275. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 213. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 235.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691515\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 14.918434\n", + "With standard deviation: 3.805352\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 7 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.554422378540039 seconds ---\n", + "[[ 40. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 64. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 40. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 304. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 228. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 246.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691516\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 15.629476\n", + "With standard deviation: 3.865387\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 8 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8757314682006836 seconds ---\n", + "[[ 45. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 72. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 45. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 333. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 243. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 257.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691515\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 16.214369\n", + "With standard deviation: 3.928756\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 9 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.205373764038086 seconds ---\n", + "[[ 50. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 80. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 50. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 362. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 258. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 268.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691515\n", + "With standard deviation: 0.564620\n", + "\n", + " Mean performance on test set: 16.725744\n", + "With standard deviation: 3.993095\n", + "\n", + "\n", + " #--- calculating kernel matrix when subtree height = 10 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.737298250198364 seconds ---\n", + "[[ 55. 10. 4. ..., 20. 20. 20.]\n", + " [ 10. 88. 4. ..., 20. 20. 20.]\n", + " [ 4. 4. 55. ..., 22. 22. 26.]\n", + " ..., \n", + " [ 20. 20. 22. ..., 391. 159. 124.]\n", + " [ 20. 20. 22. ..., 159. 273. 124.]\n", + " [ 20. 20. 26. ..., 124. 124. 279.]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 0.691516\n", + "With standard deviation: 0.564621\n", + "\n", + " Mean performance on test set: 17.186401\n", + "With standard deviation: 4.056724\n", + "\n", + "\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 15.6859 4.1392 17.6816 0.713183 0.392071\n", + " 1 7.55046 2.33179 6.27001 0.654734 0.85789\n", + " 2 9.72847 2.05767 4.45068 0.882129 1.26405\n", + " 3 11.2961 2.79994 2.27059 0.481516 1.73124\n", + " 4 12.8083 3.44694 1.07403 0.637823 2.11128\n", + " 5 14.0179 3.67504 0.700602 0.57264 2.47513\n", + " 6 14.9184 3.80535 0.691515 0.56462 2.87122\n", + " 7 15.6295 3.86539 0.691516 0.56462 3.55442\n", + " 8 16.2144 3.92876 0.691515 0.56462 3.87573\n", + " 9 16.7257 3.9931 0.691515 0.56462 4.20537\n", + " 10 17.1864 4.05672 0.691516 0.564621 4.7373\n" + ] + } + ], + "source": [ + "# test of WL subtree kernel\n", + "\n", + "\"\"\"\n", + "- This script take as input a kernel matrix\n", + "and returns the classification or regression performance\n", + "- The kernel matrix can be calculated using any of the graph kernels approaches\n", + "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", + "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", + "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", + "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", + "correspond to the average of the performances on the test sets. \n", + "\n", + "@references\n", + " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + "\"\"\"\n", + "\n", + "print(__doc__)\n", + "\n", + "import sys\n", + "import os\n", + "import pathlib\n", + "from collections import OrderedDict\n", + "sys.path.insert(0, \"../\")\n", + "from tabulate import tabulate\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.utils.utils import split_train_test\n", + "\n", + "train_means_list = []\n", + "train_stds_list = []\n", + "test_means_list = []\n", + "test_stds_list = []\n", + "kernel_time_list = []\n", + "\n", + "for height in np.linspace(0, 10, 11):\n", + " print('\\n\\n #--- calculating kernel matrix when subtree height = %d ---#' % height)\n", + "\n", + " print('\\n Loading dataset from file...')\n", + " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", + " y = np.array(y)\n", + "# print(y)\n", + "\n", + " # setup the parameters\n", + " model_type = 'regression' # Regression or classification problem\n", + " print('\\n --- This is a %s problem ---' % model_type)\n", + "\n", + "# datasize = len(dataset)\n", + " trials = 100 # Trials for hyperparameters random search\n", + " splits = 10 # Number of splits of the data\n", + " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", + " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", + "\n", + "\n", + " # set the output path\n", + " kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", + " if not os.path.exists(kernel_file_path):\n", + " os.makedirs(kernel_file_path)\n", + "\n", + " \"\"\"\n", + " - Here starts the main program\n", + " - First we permute the data, then for each split we evaluate corresponding performances\n", + " - In the end, the performances are averaged over the test sets\n", + " \"\"\"\n", + "\n", + " # save kernel matrices to files / read kernel matrices from files\n", + " kernel_file = kernel_file_path + 'km.ds'\n", + " path = pathlib.Path(kernel_file)\n", + " # get train set kernel matrix\n", + " if path.is_file():\n", + " print('\\n Loading the kernel matrix from file...')\n", + " Kmatrix = np.loadtxt(kernel_file)# results\n", + " print(Kmatrix)\n", + " else:\n", + " print('\\n Calculating kernel matrix, this could take a while...')\n", + " Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n", + " kernel_time_list.append(run_time)\n", + " print(Kmatrix)\n", + " print('\\n Saving kernel matrix to file...')\n", + " # np.savetxt(kernel_file, Kmatrix)\n", + "\n", + " train_mean, train_std, test_mean, test_std = \\\n", + " split_train_test(Kmatrix, y, alpha_grid, C_grid, splits, trials, model_type, normalize = False)\n", + " \n", + " train_means_list.append(train_mean)\n", + " train_stds_list.append(train_std)\n", + " test_means_list.append(test_mean)\n", + " test_stds_list.append(test_std)\n", + " \n", + "print('\\n') \n", + "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_list, 'std_test': test_stds_list, \\\n", + " 'RMSE_train': train_means_list, 'std_train': train_stds_list, 'k_time': kernel_time_list}\n", + "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n", + "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'O', 'C'}\n", + "{'O', 'C'}\n", + "--- shortest path kernel built in 0.0002582073211669922 seconds ---\n", + "3\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'O'})]\n", + " -> \n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CO'}), (3, {'label': 'CCCO'}), (4, {'label': 'OCC'})]\n", + " -> \n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '3'}), (3, {'label': '1'}), (4, {'label': '2'})]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'C'}), (5, {'label': 'C'}), (6, {'label': 'O'})]\n", + " -> \n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CC'}), (3, {'label': 'CO'}), (4, {'label': 'CCCC'}), (5, {'label': 'CCCO'}), (6, {'label': 'OCC'})]\n", + " -> \n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '0'}), (3, {'label': '3'}), (4, {'label': '4'}), (5, {'label': '1'}), (6, {'label': '2'})]\n", + "--- shortest path kernel built in 0.00026607513427734375 seconds ---\n", + "6\n" + ] + } + ], + "source": [ + "import sys\n", + "import networkx as nx\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.kernels.spkernel import spkernel\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "def weisfeilerlehman_test(G):\n", + " '''\n", + " Weisfeiler-Lehman test of graph isomorphism.\n", + " '''\n", + "\n", + " nx.draw_networkx(G)\n", + " plt.show()\n", + " nx.draw_networkx_labels(G, nx.spring_layout(G), labels = nx.get_node_attributes(G,'label'))\n", + " print(G.nodes(data = True))\n", + " \n", + " set_multisets = []\n", + " for node in G.nodes(data = True):\n", + " # Multiset-label determination.\n", + " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", + " # sorting each multiset\n", + " multiset.sort()\n", + " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", + " set_multisets.append(multiset)\n", + " \n", + " # label compression\n", + "# set_multisets.sort() # this is unnecessary\n", + " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", + " set_compressed = { value : str(set_unique.index(value)) for value in set_unique } # assign indices as the new labels\n", + "# print(set_compressed)\n", + "# print(set_multisets)\n", + " \n", + " # relabel nodes with multisets\n", + " for node in G.nodes(data = True):\n", + " node[1]['label'] = set_multisets[node[0]]\n", + " print(' -> ')\n", + " nx.draw_networkx(G)\n", + " plt.show()\n", + " print(G.nodes(data = True))\n", + "\n", + " \n", + " # relabel nodes\n", + " for node in G.nodes(data = True):\n", + " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", + " \n", + " print(' -> ')\n", + " nx.draw_networkx(G)\n", + " plt.show()\n", + " print(G.nodes(data = True))\n", + "\n", + "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", + "G1 = dataset[12]\n", + "G2 = dataset[55]\n", + "\n", + "# init.\n", + "kernel = 0 # init kernel\n", + "num_nodes1 = G1.number_of_nodes()\n", + "num_nodes2 = G2.number_of_nodes()\n", + "\n", + "# the first iteration.\n", + "labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", + "labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", + "print(labelset1)\n", + "print(labelset2)\n", + "kernel += spkernel(G1, G2)\n", + "print(kernel)\n", + "\n", + "\n", + "\n", + "for height in range(0, min(num_nodes1, num_nodes2)): #Q how to determine the upper bound of the height?\n", + " if labelset1 != labelset2:\n", + " break\n", + " \n", + " # Weisfeiler-Lehman test of graph isomorphism.\n", + " weisfeilerlehman_test(G1)\n", + " weisfeilerlehman_test(G2)\n", + " \n", + " # calculate kernel\n", + " kernel += spkernel(G1, G2)\n", + " \n", + " # get label sets of both graphs\n", + " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", + " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", + "# print(labelset1)\n", + "# print(labelset2)\n", + "\n", + "print(kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}\n", + "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'C', 6: 'S', 7: 'S'}\n", + "\n", + " --- height = 0 --- \n", + "\n", + " --- for graph 0 --- \n", + "\n", + "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n", + "all_labels_ori: {'C', 'O'}\n", + "num_of_each_label: {'C': 5, 'O': 2}\n", + "all_num_of_each_label: [{'C': 5, 'O': 2}]\n", + "num_of_labels: 2\n", + "all_labels_ori: {'C', 'O'}\n", + "\n", + " --- for graph 1 --- \n", + "\n", + "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n", + "all_labels_ori: {'C', 'O', 'S'}\n", + "num_of_each_label: {'C': 6, 'S': 2}\n", + "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n", + "num_of_labels: 2\n", + "all_labels_ori: {'C', 'O', 'S'}\n", + "\n", + " all_num_of_labels_occured: 3\n", + "\n", + " --- calculating kernel matrix ---\n", + "\n", + " labels: {'C', 'O'}\n", + "vector1: [[5 2]]\n", + "vector2: [[5 2]]\n", + "Kmatrix: [[ 29. 0.]\n", + " [ 0. 0.]]\n", + "\n", + " labels: {'C', 'O', 'S'}\n", + "vector1: [[5 2 0]]\n", + "vector2: [[6 0 2]]\n", + "Kmatrix: [[ 29. 30.]\n", + " [ 30. 0.]]\n", + "\n", + " labels: {'C', 'S'}\n", + "vector1: [[6 2]]\n", + "vector2: [[6 2]]\n", + "Kmatrix: [[ 29. 30.]\n", + " [ 30. 40.]]\n", + "\n", + " --- height = 1 --- \n", + "\n", + " --- for graph 0 --- \n", + "\n", + "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n", + "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n", + "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", + "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", + "num_of_labels_occured: 7\n", + "\n", + " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n", + "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n", + "all_labels_ori: {'5', '4', '6', '7'}\n", + "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n", + "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n", + "\n", + " --- for graph 1 --- \n", + "\n", + "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n", + "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n", + "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n", + "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n", + "num_of_labels_occured: 10\n", + "\n", + " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n", + "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n", + "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n", + "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n", + "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n", + "\n", + " all_num_of_labels_occured: 10\n", + "\n", + " --- calculating kernel matrix ---\n", + "\n", + " labels: {'5', '4', '6', '7'}\n", + "vector1: [[1 2 2 2]]\n", + "vector2: [[1 2 2 2]]\n", + "\n", + " labels: {'10', '4', '7', '9', '6', '5', '8'}\n", + "vector1: [[0 2 2 0 2 1 0]]\n", + "vector2: [[1 0 3 2 0 0 2]]\n", + "\n", + " labels: {'8', '10', '7', '9'}\n", + "vector1: [[2 1 3 2]]\n", + "vector2: [[2 1 3 2]]\n", + "\n", + " Kmatrix: [[ 42. 36.]\n", + " [ 36. 58.]]\n", + "\n", + " --- height = 2 --- \n", + "\n", + " --- for graph 0 --- \n", + "\n", + "multiset: ['76', '76', '647', '647', '544', '456', '456']\n", + "set_unique: ['647', '76', '456', '544']\n", + "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n", + "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n", + "num_of_labels_occured: 14\n", + "\n", + " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n", + "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n", + "all_labels_ori: {'14', '12', '11', '13'}\n", + "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n", + "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n", + "\n", + " --- for graph 1 --- \n", + "\n", + "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n", + "set_unique: ['710', '8109', '79', '10788', '978']\n", + "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n", + "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n", + "num_of_labels_occured: 19\n", + "\n", + " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n", + "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n", + "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", + "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n", + "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n", + "\n", + " all_num_of_labels_occured: 19\n", + "\n", + " --- calculating kernel matrix ---\n", + "\n", + " labels: {'14', '12', '11', '13'}\n", + "vector1: [[1 2 2 2]]\n", + "vector2: [[1 2 2 2]]\n", + "\n", + " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", + "vector1: [[0 0 2 2 0 2 1 0 0]]\n", + "vector2: [[1 2 0 0 2 0 0 2 1]]\n", + "\n", + " labels: {'18', '17', '15', '16', '19'}\n", + "vector1: [[1 2 1 2 2]]\n", + "vector2: [[1 2 1 2 2]]\n", + "\n", + " Kmatrix: [[ 55. 36.]\n", + " [ 36. 72.]]\n", + "\n", + " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 55., 36.],\n", + " [ 36., 72.]])" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# test of WL subtree kernel on many graphs\n", + "\n", + "import sys\n", + "import pathlib\n", + "from collections import Counter\n", + "sys.path.insert(0, \"../\")\n", + "\n", + "import networkx as nx\n", + "import numpy as np\n", + "import time\n", + "\n", + "from pygraph.kernels.spkernel import spkernel\n", + "from pygraph.kernels.pathKernel import pathkernel\n", + "\n", + "def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):\n", + " \"\"\"Calculate Weisfeiler-Lehman kernels between graphs.\n", + " \n", + " Parameters\n", + " ----------\n", + " Gn : List of NetworkX graph\n", + " List of graphs between which the kernels are calculated.\n", + " /\n", + " G1, G2 : NetworkX graphs\n", + " 2 graphs between which the kernel is calculated.\n", + " \n", + " height : subtree height\n", + " \n", + " base_kernel : base kernel used in each iteration of WL kernel\n", + " the default base kernel is subtree kernel\n", + " \n", + " Return\n", + " ------\n", + " Kmatrix/Kernel : Numpy matrix/int\n", + " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs.\n", + " \n", + " Notes\n", + " -----\n", + " This function now supports WL subtree kernel and WL shortest path kernel.\n", + " \n", + " References\n", + " ----------\n", + " [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61.\n", + " \"\"\"\n", + " if len(args) == 1: # for a list of graphs\n", + "\n", + "# print(args)\n", + " start_time = time.time()\n", + " \n", + " # for WL subtree kernel\n", + " if base_kernel == 'subtree': \n", + " Kmatrix = _wl_subtreekernel_do(args[0], height = height, base_kernel = 'subtree')\n", + " \n", + " # for WL edge kernel\n", + " elif base_kernel == 'edge':\n", + " print('edge')\n", + " \n", + " # for WL shortest path kernel\n", + " elif base_kernel == 'sp':\n", + " Gn = args[0]\n", + " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", + " \n", + " for i in range(0, len(Gn)):\n", + " for j in range(i, len(Gn)):\n", + " Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j])\n", + " Kmatrix[j][i] = Kmatrix[i][j]\n", + "\n", + " print(\"\\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---\" % (base_kernel, len(args[0]), (time.time() - start_time)))\n", + " \n", + " return Kmatrix\n", + " \n", + " else: # for only 2 graphs\n", + " \n", + " start_time = time.time()\n", + " \n", + " # for WL subtree kernel\n", + " if base_kernel == 'subtree':\n", + " \n", + " args = [args[0], args[1]]\n", + "# print(args)\n", + " kernel = _wl_subtreekernel_do(args, height = height, base_kernel = 'subtree')\n", + " \n", + " # for WL edge kernel\n", + " elif base_kernel == 'edge':\n", + " print('edge')\n", + " \n", + " # for WL shortest path kernel\n", + " elif base_kernel == 'sp':\n", + " \n", + "\n", + " kernel = _pathkernel_do(args[0], args[1])\n", + "\n", + " print(\"\\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---\" % (base_kernel, time.time() - start_time))\n", + " \n", + " return kernel\n", + " \n", + " \n", + "def _weisfeilerlehmankernel_do(G1, G2):\n", + " \"\"\"Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration.\n", + " \n", + " Parameters\n", + " ----------\n", + " G1, G2 : NetworkX graphs\n", + " 2 graphs between which the kernel is calculated.\n", + " \n", + " Return\n", + " ------\n", + " Kernel : int\n", + " Weisfeiler-Lehman Kernel between 2 graphs.\n", + " \"\"\"\n", + " \n", + " # init.\n", + " kernel = 0 # init kernel\n", + " num_nodes1 = G1.number_of_nodes()\n", + " num_nodes2 = G2.number_of_nodes()\n", + " height = 12 #min(num_nodes1, num_nodes2)) #Q how to determine the upper bound of the height?\n", + " \n", + " # the first iteration.\n", + " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", + " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", + " kernel += pathkernel(G1, G2) # change your base kernel here (and one more below)\n", + " \n", + " for h in range(0, height):\n", + "# if labelset1 != labelset2:\n", + "# break\n", + "\n", + " # Weisfeiler-Lehman test of graph isomorphism.\n", + " relabel(G1)\n", + " relabel(G2)\n", + "\n", + " # calculate kernel\n", + " kernel += pathkernel(G1, G2) # change your base kernel here (and one more before)\n", + "\n", + " # get label sets of both graphs\n", + " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", + " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", + " \n", + " return kernel\n", + "\n", + "\n", + "def relabel(G):\n", + " '''\n", + " Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism.\n", + " \n", + " Parameters\n", + " ----------\n", + " G : NetworkX graph\n", + " The graphs whose nodes are relabeled.\n", + " '''\n", + " \n", + " # get the set of original labels\n", + " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", + " print(labels_ori)\n", + " num_of_each_label = dict(Counter(labels_ori))\n", + " print(num_of_each_label)\n", + " num_of_labels = len(num_of_each_label)\n", + " print(num_of_labels)\n", + " \n", + " set_multisets = []\n", + " for node in G.nodes(data = True):\n", + " # Multiset-label determination.\n", + " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", + " # sorting each multiset\n", + " multiset.sort()\n", + " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", + " set_multisets.append(multiset)\n", + " print(set_multisets)\n", + " \n", + " # label compression\n", + "# set_multisets.sort() # this is unnecessary\n", + " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", + " print(set_unique)\n", + " set_compressed = { value : str(set_unique.index(value) + num_of_labels + 1) for value in set_unique } # assign new labels\n", + " print(set_compressed)\n", + " \n", + " # relabel nodes\n", + "# nx.relabel_nodes(G, set_compressed, copy = False)\n", + " for node in G.nodes(data = True):\n", + " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", + " print(nx.get_node_attributes(G, 'label'))\n", + "\n", + " # get the set of compressed labels\n", + " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", + " print(labels_comp)\n", + " num_of_each_label.update(dict(Counter(labels_comp)))\n", + " print(num_of_each_label)\n", + " \n", + " \n", + "def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):\n", + " \"\"\"Calculate Weisfeiler-Lehman subtree kernels between graphs.\n", + " \n", + " Parameters\n", + " ----------\n", + " Gn : List of NetworkX graph\n", + " List of graphs between which the kernels are calculated.\n", + " \n", + " Return\n", + " ------\n", + " Kmatrix/Kernel : Numpy matrix/int\n", + " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.\n", + " \"\"\"\n", + " \n", + "# print(args)\n", + " Gn = args[0]\n", + "# print(Gn)\n", + "\n", + " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", + " all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs\n", + " \n", + " # initial for height = 0\n", + " print('\\n --- height = 0 --- ')\n", + " all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n", + " all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n", + " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n", + " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n", + "\n", + " # for each graph\n", + " for idx, G in enumerate(Gn):\n", + " # get the set of original labels\n", + " print('\\n --- for graph %d --- \\n' % (idx))\n", + " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", + " print('labels_ori: %s' % (labels_ori))\n", + " all_labels_ori.update(labels_ori)\n", + " print('all_labels_ori: %s' % (all_labels_ori))\n", + " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", + " print('num_of_each_label: %s' % (num_of_each_label))\n", + " all_num_of_each_label.append(num_of_each_label)\n", + " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n", + " num_of_labels = len(num_of_each_label) # number of all unique labels\n", + " print('num_of_labels: %s' % (num_of_labels))\n", + " \n", + "\n", + " all_labels_ori.update(labels_ori)\n", + " print('all_labels_ori: %s' % (all_labels_ori))\n", + " \n", + " all_num_of_labels_occured += len(all_labels_ori)\n", + " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", + " \n", + " # calculate subtree kernel with the 0th iteration and add it to the final kernel\n", + " print('\\n --- calculating kernel matrix ---')\n", + " for i in range(0, len(Gn)):\n", + " for j in range(i, len(Gn)):\n", + " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", + " print('\\n labels: %s' % (labels))\n", + " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", + " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", + " print('vector1: %s' % (vector1))\n", + " print('vector2: %s' % (vector2))\n", + " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", + " Kmatrix[j][i] = Kmatrix[i][j]\n", + " print('Kmatrix: %s' % (Kmatrix))\n", + "\n", + " \n", + " # iterate each height\n", + " for h in range(1, height + 1):\n", + " print('\\n --- height = %d --- ' % (h))\n", + " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n", + " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n", + " all_labels_ori = set()\n", + " all_num_of_each_label = []\n", + " \n", + " # for each graph\n", + " for idx, G in enumerate(Gn):\n", + "# # get the set of original labels\n", + " print('\\n --- for graph %d --- \\n' % (idx))\n", + "# labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", + "# print('labels_ori: %s' % (labels_ori))\n", + "# num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", + "# print('num_of_each_label: %s' % (num_of_each_label))\n", + "# num_of_labels = len(num_of_each_label) # number of all unique labels\n", + "# print('num_of_labels: %s' % (num_of_labels))\n", + " \n", + "# all_labels_ori.update(labels_ori)\n", + "# print('all_labels_ori: %s' % (all_labels_ori))\n", + "# # num_of_labels_occured += num_of_labels #@todo not precise\n", + "# num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n", + "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", + " \n", + " set_multisets = []\n", + " for node in G.nodes(data = True):\n", + " # Multiset-label determination.\n", + " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", + " # sorting each multiset\n", + " multiset.sort()\n", + " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", + " set_multisets.append(multiset)\n", + " print('multiset: %s' % (set_multisets))\n", + "\n", + " # label compression\n", + " # set_multisets.sort() # this is unnecessary\n", + " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", + " print('set_unique: %s' % (set_unique))\n", + " # a dictionary mapping original labels to new ones. \n", + " set_compressed = {}\n", + " # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label \n", + " for value in set_unique:\n", + " if value in all_set_compressed.keys():\n", + " set_compressed.update({ value : all_set_compressed[value] })\n", + " else:\n", + " set_compressed.update({ value : str(num_of_labels_occured + 1) })\n", + " num_of_labels_occured += 1\n", + "# set_compressed = { value : (all_set_compressed[value] if value in all_set_compressed.keys() else str(set_unique.index(value) + num_of_labels_occured + 1)) for value in set_unique }\n", + " print('set_compressed: %s' % (set_compressed))\n", + " \n", + " all_set_compressed.update(set_compressed)\n", + " print('all_set_compressed: %s' % (all_set_compressed))\n", + "# num_of_labels_occured += len(set_compressed) #@todo not precise\n", + " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", + " \n", + " # relabel nodes\n", + " # nx.relabel_nodes(G, set_compressed, copy = False)\n", + " for node in G.nodes(data = True):\n", + " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", + " print('\\n compressed labels: %s' % (nx.get_node_attributes(G, 'label')))\n", + "\n", + " # get the set of compressed labels\n", + " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", + " print('labels_comp: %s' % (labels_comp))\n", + " all_labels_ori.update(labels_comp)\n", + " print('all_labels_ori: %s' % (all_labels_ori))\n", + " num_of_each_label = dict(Counter(labels_comp))\n", + " print('num_of_each_label: %s' % (num_of_each_label))\n", + " all_num_of_each_label.append(num_of_each_label)\n", + " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n", + " \n", + " all_num_of_labels_occured += len(all_labels_ori)\n", + " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", + " \n", + " # calculate subtree kernel with h iterations and add it to the final kernel\n", + " print('\\n --- calculating kernel matrix ---')\n", + " for i in range(0, len(Gn)):\n", + " for j in range(i, len(Gn)):\n", + " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", + " print('\\n labels: %s' % (labels))\n", + " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", + " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", + " print('vector1: %s' % (vector1))\n", + " print('vector2: %s' % (vector2))\n", + " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", + " Kmatrix[j][i] = Kmatrix[i][j]\n", + " \n", + " print('\\n Kmatrix: %s' % (Kmatrix))\n", + "\n", + " return Kmatrix\n", + "\n", + " \n", + "# main\n", + "import sys\n", + "from collections import Counter\n", + "import networkx as nx\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.kernels.spkernel import spkernel\n", + "\n", + "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", + "G1 = dataset[15]\n", + "print(nx.get_node_attributes(G1, 'label'))\n", + "G2 = dataset[80]\n", + "print(nx.get_node_attributes(G2, 'label'))\n", + "\n", + "weisfeilerlehmankernel(G1, G2, height = 2)\n", + "# Kmatrix = weisfeilerlehmankernel(G1, G2)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "185" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "len(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "- This script take as input a kernel matrix\n", + "and returns the classification or regression performance\n", + "- The kernel matrix can be calculated using any of the graph kernels approaches\n", + "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", + "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", + "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", + "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", + "correspond to the average of the performances on the test sets. \n", + "\n", + "@references\n", + " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + "\n", + "\n", + " --- calculating kernel matrix when subtree height = 0 ---\n", + "\n", + " Loading dataset from file...\n", + "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", + " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", + " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", + " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", + " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", + " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", + " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", + " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", + " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", + " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", + " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", + " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", + " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", + " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", + " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", + " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", + " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", + " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", + "\n", + " --- This is a regression problem ---\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# Author: Elisabetta Ghisu\n", + "# test of WL subtree kernel\n", + "\n", + "\"\"\"\n", + "- This script take as input a kernel matrix\n", + "and returns the classification or regression performance\n", + "- The kernel matrix can be calculated using any of the graph kernels approaches\n", + "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", + "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", + "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", + "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", + "correspond to the average of the performances on the test sets. \n", + "\n", + "@references\n", + " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", + "\"\"\"\n", + "\n", + "print(__doc__)\n", + "\n", + "import sys\n", + "import os\n", + "import pathlib\n", + "sys.path.insert(0, \"../\")\n", + "from tabulate import tabulate\n", + "\n", + "import random\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from sklearn.kernel_ridge import KernelRidge # 0.17\n", + "from sklearn.metrics import accuracy_score, mean_squared_error\n", + "from sklearn import svm\n", + "\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "\n", + "val_means_height = []\n", + "val_stds_height = []\n", + "test_means_height = []\n", + "test_stds_height = []\n", + "\n", + "\n", + "for height in np.linspace(0, 10, 11):\n", + " print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n", + "\n", + " print('\\n Loading dataset from file...')\n", + " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", + " y = np.array(y)\n", + " print(y)\n", + "\n", + " # setup the parameters\n", + " model_type = 'regression' # Regression or classification problem\n", + " print('\\n --- This is a %s problem ---' % model_type)\n", + "\n", + " datasize = len(dataset)\n", + " trials = 100 # Trials for hyperparameters random search\n", + " splits = 10 # Number of splits of the data\n", + " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", + " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", + " random.seed(20) # Set the seed for uniform parameter distribution\n", + "\n", + " # set the output path\n", + " kernel_file_path = 'kernelmatrices_weisfeilerlehman_acyclic/'\n", + " if not os.path.exists(kernel_file_path):\n", + " os.makedirs(kernel_file_path)\n", + "\n", + "\n", + " \"\"\"\n", + " - Here starts the main program\n", + " - First we permute the data, then for each split we evaluate corresponding performances\n", + " - In the end, the performances are averaged over the test sets\n", + " \"\"\"\n", + "\n", + " # save kernel matrices to files / read kernel matrices from files\n", + " kernel_file = kernel_file_path + 'km.ds'\n", + " path = pathlib.Path(kernel_file)\n", + " # get train set kernel matrix\n", + " if path.is_file():\n", + " print('\\n Loading the kernel matrix from file...')\n", + " Kmatrix = np.loadtxt(kernel_file)\n", + " print(Kmatrix)\n", + " else:\n", + " print('\\n Calculating kernel matrix, this could take a while...')\n", + " Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n", + " print(Kmatrix)\n", + " print('\\n Saving kernel matrix to file...')\n", + "# np.savetxt(kernel_file, Kmatrix)\n", + "\n", + " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", + " val_split = []\n", + " test_split = []\n", + "\n", + " # For each split of the data\n", + " for j in range(10, 10 + splits):\n", + " # print('\\n Starting split %d...' % j)\n", + "\n", + " # Set the random set for data permutation\n", + " random_state = int(j)\n", + " np.random.seed(random_state)\n", + " idx_perm = np.random.permutation(datasize)\n", + " # print(idx_perm)\n", + "\n", + " # Permute the data\n", + " y_perm = y[idx_perm] # targets permutation\n", + " # print(y_perm)\n", + " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n", + " # print(Kmatrix_perm)\n", + " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", + "\n", + " # Set the training, validation and test\n", + " # Note: the percentage can be set up by the user\n", + " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", + " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", + " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", + " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", + "\n", + " # Split the kernel matrix\n", + " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", + " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", + " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", + "\n", + " # Split the targets\n", + " y_train = y_perm[0:num_train]\n", + "\n", + " # Normalization step (for real valued targets only)\n", + " if model_type == 'regression':\n", + " # print('\\n Normalizing output y...')\n", + " y_train_mean = np.mean(y_train)\n", + " y_train_std = np.std(y_train)\n", + " y_train = (y_train - y_train_mean) / float(y_train_std)\n", + " # print(y)\n", + "\n", + " y_val = y_perm[num_train:(num_train + num_val)]\n", + " y_test = y_perm[(num_train + num_val):datasize]\n", + "\n", + " # Record the performance for each parameter trial respectively on validation and test set\n", + " perf_all_train = []\n", + " perf_all_test = []\n", + "\n", + " # For each parameter trial\n", + " for i in range(trials):\n", + " # For regression use the Kernel Ridge method\n", + " if model_type == 'regression':\n", + " # print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n", + "\n", + " # Fit the kernel ridge model\n", + " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n", + " # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", + " KR.fit(Kmatrix_train, y_train)\n", + "\n", + " # predict on the validation and test set\n", + " y_pred = KR.predict(Kmatrix_val)\n", + " y_pred_test = KR.predict(Kmatrix_test)\n", + " # print(y_pred)\n", + "\n", + " # adjust prediction: needed because the training targets have been normalizaed\n", + " y_pred = y_pred * float(y_train_std) + y_train_mean\n", + " # print(y_pred)\n", + " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", + " # print(y_pred_test)\n", + "\n", + " # root mean squared error on validation\n", + " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", + " perf_all_val.append(rmse)\n", + "\n", + " # root mean squared error in test \n", + " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", + " perf_all_test.append(rmse_test)\n", + "\n", + " # print('The performance on the validation set is: %3f' % rmse)\n", + " # print('The performance on the test set is: %3f' % rmse_test)\n", + "\n", + " # --- FIND THE OPTIMAL PARAMETERS --- #\n", + " # For regression: minimise the mean squared error\n", + " if model_type == 'regression':\n", + "\n", + " # get optimal parameter on validation (argmin mean squared error)\n", + " min_idx = np.argmin(perf_all_test)\n", + " alpha_opt = alpha_grid[min_idx]\n", + "\n", + " # performance corresponding to optimal parameter on val\n", + " perf_val_opt = perf_all_val[min_idx]\n", + "\n", + " # corresponding performance on test for the same parameter\n", + " perf_test_opt = perf_all_test[min_idx]\n", + "\n", + " # print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", + " # print('The best performance on the validation set is: %3f' % perf_val_opt)\n", + " # print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", + "\n", + " # append the best performance on validation\n", + " # at the current split\n", + " val_split.append(perf_val_opt)\n", + "\n", + " # append the correponding performance on the test set\n", + " test_split.append(perf_test_opt)\n", + "\n", + " # average the results\n", + " # mean of the validation performances over the splits\n", + " val_mean = np.mean(np.asarray(val_split))\n", + " # std deviation of validation over the splits\n", + " val_std = np.std(np.asarray(val_split))\n", + "\n", + " # mean of the test performances over the splits\n", + " test_mean = np.mean(np.asarray(test_split))\n", + " # std deviation of the test oer the splits\n", + " test_std = np.std(np.asarray(test_split))\n", + "\n", + " print('\\n Mean performance on val set: %3f' % val_mean)\n", + " print('With standard deviation: %3f' % val_std)\n", + " print('\\n Mean performance on test set: %3f' % test_mean)\n", + " print('With standard deviation: %3f' % test_std)\n", + " \n", + " val_means_height.append(val_mean)\n", + " val_stds_height.append(val_std)\n", + " test_means_height.append(test_mean)\n", + " test_stds_height.append(test_std)\n", + " \n", + "print('\\n') \n", + "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# a = [0, 1, 3, 2]\n", + "# b = [3, 2, 1, 0]\n", + "# print(1 if a == b else 0)\n", + "\n", + "# max(1 ,2)\n", + "\n", + "# x = [ 'r', 'a', 's' ]\n", + "# x.sort()\n", + "# print(x)\n", + "\n", + "# def test1(*args, base = 'subtree'):\n", + "# if base == 'subtree':\n", + "# print('subtree')\n", + "# elif base == 'edge':\n", + "# print('edge')\n", + "# else:\n", + "# print('sp')\n", + "\n", + "# # function parameter usage test\n", + "# test1('hello', 'hi', base = 'edge')\n", + "\n", + "# # python matrix calculation speed test\n", + "# import numpy as np\n", + "# import time\n", + "\n", + "# size = 100\n", + "# m1 = np.random.random((size, size))\n", + "# m2 = np.random.random((size, size))\n", + "# itr = 1\n", + "\n", + "# start_time = time.time()\n", + "# for i in range(itr):\n", + "# np.dot(m1, m2)\n", + "# print(time.time() - start_time)\n", + "\n", + "# start_time = time.time()\n", + "# for j in range(itr):\n", + "# result = np.zeros((size, size))\n", + "# for i1 in range(size):\n", + "# for i2 in range(size):\n", + "# for i3 in range(size):\n", + "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", + "# print(time.time() - start_time)\n", + "\n", + "# start_time = time.time()\n", + "# for i in range(itr):\n", + "# print(np.dot(m1, m2))\n", + "# print(time.time() - start_time)\n", + "\n", + "# start_time = time.time()\n", + "# for j in range(itr):\n", + "# result = np.zeros((size, size))\n", + "# for i1 in range(size):\n", + "# for i2 in range(size):\n", + "# for i3 in range(size):\n", + "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", + "# print(result)\n", + "# print(time.time() - start_time)\n", + "\n", + "# help(np.sum)\n", + "\n", + "# test dict\n", + "import sys\n", + "from collections import Counter\n", + "import networkx as nx\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.kernels.spkernel import spkernel\n", + "\n", + "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", + "G1 = dataset[15]\n", + "nx.get_node_attributes(G1, 'label')\n", + "listhqhq = list(nx.get_node_attributes(G1, 'label').values())\n", + "dicthaha = dict(Counter(listhqhq))\n", + "len(dicthaha)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md b/README.md index b43f901..102353c 100644 --- a/README.md +++ b/README.md @@ -11,26 +11,30 @@ A python package for graph kernels. * tabulate - 0.8.2 ## Results with minimal test RMSE for each kernel on dataset Asyclic -All kernels are tested on dataset Asyclic, which consists of 185 molecules (graphs). + +All kernels expect for Cyclic pattern kernel are tested on dataset Asyclic, which consists of 185 molecules (graphs). (Cyclic pattern kernel is tested on dataset MAO and PAH.) The criteria used for prediction are SVM for classification and kernel Ridge regression for regression. For predition we randomly divide the data in train and test subset, where 90% of entire dataset is for training and rest for testing. 10 splits are performed. For each split, we first train on the train data, then evaluate the performance on the test set. We choose the optimal parameters for the test set and finally provide the corresponding performance. The final results correspond to the average of the performances on the test sets. -| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time | -|---------------|:-------:|:------:|-------------:|-------:| -| Shortest path | 35.19 | 4.50 | - | 14.58" | -| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" | -| Path | 14.00 | 6.93 | - | 36.21" | -| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" | -| Treelet | 8.31 | 3.38 | - | 0.50" | -| Path up to d | 7.43 | 2.69 | depth = 2 | 0.59" | - +| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time | +|------------------|:-------:|:------:|------------------:|-------:| +| Shortest path | 35.19 | 4.50 | - | 14.58" | +| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" | +| Path | 18.41 | 10.78 | - | 29.43" | +| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" | +| WL shortest path | 35.16 | 4.50 | height = 2 | 40.24" | +| WL edge | 33.41 | 4.73 | height = 5 | 5.66" | +| Treelet | 8.31 | 3.38 | - | 0.50" | +| Path up to d | 7.43 | 2.69 | depth = 2 | 0.59" | +| Tree pattern | 7.27 | 2.21 | lamda = 1, h = 2 | 37.24" | +| Cyclic pattern | 0.9 | 0.11 | cycle bound = 100 | 0.31" | * RMSE stands for arithmetic mean of the root mean squared errors on all splits. * STD stands for standard deviation of the root mean squared errors on all splits. * Paremeter is the one with which the kenrel achieves the best results. * k_time is the time spent on building the kernel matrix. -* The targets of training data are normalized before calculating *path kernel* and *treelet kernel*. +* The targets of training data are normalized before calculating *treelet kernel*. * See detail results in [results.md](pygraph/kernels/results.md). ## References @@ -44,6 +48,12 @@ For predition we randomly divide the data in train and test subset, where 90% of [5] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. +[6] Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre Baldi. Graph kernels for chemical informatics. Neural networks, 18(8):1093–1110, 2005. + +[7] Pierre Mahé and Jean-Philippe Vert. Graph kernels based on tree patterns for molecules. Machine learning, 75(1):3–35, 2009. + +[8] Tamás Horváth, Thomas Gärtner, and Stefan Wrobel. Cyclic pattern kernels for predictive graph mining. In Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining, pages 158–167. ACM, 2004. + ## Updates ### 2018.01.24 * ADD *path kernel up to depth d* and its result on dataset Asyclic. diff --git a/notebooks/.ipynb_checkpoints/run_cyclicpatternkernel-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_cyclicpatternkernel-checkpoint.ipynb new file mode 100644 index 0000000..8b5f587 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/run_cyclicpatternkernel-checkpoint.ipynb @@ -0,0 +1,936 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 0.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 373.39it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 26367.08it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.18705153465270996 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 83%|████████▎ | 834/1000 [00:00<00:00, 2077.02it/s]\n", + " Mean performance on train set: 0.549180\n", + "With standard deviation: 0.016798\n", + "\n", + " Mean performance on test set: 0.642857\n", + "With standard deviation: 0.146385\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2083.52it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 50.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 231.33it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 15078.65it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3006291389465332 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 81%|████████ | 808/1000 [00:00<00:00, 2005.12it/s]\n", + " Mean performance on train set: 0.698361\n", + "With standard deviation: 0.116889\n", + "\n", + " Mean performance on test set: 0.871429\n", + "With standard deviation: 0.100000\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2024.59it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 100.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 224.68it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13144.65it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30983662605285645 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 82%|████████▏ | 821/1000 [00:00<00:00, 2050.17it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2050.63it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 150.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.10it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12644.09it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31808018684387207 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 993/1000 [00:00<00:00, 1993.90it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1977.95it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 200.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.08it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14177.69it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31757450103759766 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:00<00:00, 1969.03it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1951.39it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 250.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 218.22it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12697.56it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3192298412322998 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1878.10it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1875.67it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 300.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 206.81it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12364.00it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33614420890808105 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 970/1000 [00:00<00:00, 1947.13it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1934.26it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 350.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 189.65it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13989.93it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3654501438140869 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1875.81it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1881.94it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 400.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 220.95it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14281.34it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3142852783203125 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 952/1000 [00:00<00:00, 1900.77it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1900.46it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 450.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 212.09it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 11357.62it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3287320137023926 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 981/1000 [00:00<00:00, 1956.30it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1952.54it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 500.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.14it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12536.27it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3347315788269043 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 979/1000 [00:00<00:00, 1970.30it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1950.19it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 550.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.06it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13816.44it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3341798782348633 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 974/1000 [00:00<00:00, 1930.44it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1937.89it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 600.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 213.56it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13048.43it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32569050788879395 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 972/1000 [00:00<00:00, 1924.82it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1935.68it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 650.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 216.51it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 9669.54it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3229689598083496 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:00<00:00, 1963.08it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1960.32it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 700.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.61it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13485.23it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33377623558044434 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 81%|████████ | 812/1000 [00:00<00:00, 2020.52it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2029.28it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 750.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.54it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13952.29it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31093406677246094 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2053.32it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2055.77it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 800.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.35it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13220.82it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31124091148376465 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 959/1000 [00:00<00:00, 1925.40it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1912.78it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 850.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 202.00it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12487.42it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.34392237663269043 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1869.41it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1883.23it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 900.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 217.23it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13956.38it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32010626792907715 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 989/1000 [00:00<00:00, 1978.29it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1968.44it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 950.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 228.56it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14794.72it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30414795875549316 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 83%|████████▎ | 829/1000 [00:00<00:00, 2063.72it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2068.06it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.02it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13702.27it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3120880126953125 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2054.81it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2022.62it/s]\n", + "\n", + "\n", + " cycle_bound accur_test std_test accur_train std_train k_time\n", + "------------- ------------ ---------- ------------- ----------- --------\n", + " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n", + " 50 0.871429 0.1 0.698361 0.116889 0.300629\n", + " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n", + " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n", + " 200 0.9 0.111575 0.732787 0.0826366 0.317575\n", + " 250 0.9 0.111575 0.732787 0.0826366 0.31923\n", + " 300 0.9 0.111575 0.732787 0.0826366 0.336144\n", + " 350 0.9 0.111575 0.732787 0.0826366 0.36545\n", + " 400 0.9 0.111575 0.732787 0.0826366 0.314285\n", + " 450 0.9 0.111575 0.732787 0.0826366 0.328732\n", + " 500 0.9 0.111575 0.732787 0.0826366 0.334732\n", + " 550 0.9 0.111575 0.732787 0.0826366 0.33418\n", + " 600 0.9 0.111575 0.732787 0.0826366 0.325691\n", + " 650 0.9 0.111575 0.732787 0.0826366 0.322969\n", + " 700 0.9 0.111575 0.732787 0.0826366 0.333776\n", + " 750 0.9 0.111575 0.732787 0.0826366 0.310934\n", + " 800 0.9 0.111575 0.732787 0.0826366 0.311241\n", + " 850 0.9 0.111575 0.732787 0.0826366 0.343922\n", + " 900 0.9 0.111575 0.732787 0.0826366 0.320106\n", + " 950 0.9 0.111575 0.732787 0.0826366 0.304148\n", + " 1000 0.9 0.111575 0.732787 0.0826366 0.312088\n" + ] + } + ], + "source": [ + "# MAO dataset (node labeled, edge labeled, undirected, cyclic + linear, classification)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/MAO/dataset.ds'\n", + "kernel_file_path = 'kernelmatrices_cyclicpattern_mao/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 500, 21), normalize = False,\n", + " model_type = 'classification')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# PAH dataset (node and edge unlabeled, undirected, cyclic, classification)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/PAH/dataset.ds'\n", + "kernel_file_path = 'kernelmatrices_cyclicpattern_pah/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 500, 21), normalize = False,\n", + " model_type = 'classification')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# results\n", + "\n", + "# MAO dataset\n", + "cycle_bound accur_test std_test accur_train std_train k_time\n", + "------------- ------------ ---------- ------------- ----------- --------\n", + " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n", + " 50 0.871429 0.1 0.698361 0.116889 0.300629\n", + " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n", + " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n", + " 200 0.9 0.111575 0.732787 0.0826366 0.317575" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "load SDF: 100%|██████████| 4457424/4457424 [00:10<00:00, 408299.51it/s]\n", + "ajust data: 100%|██████████| 42687/42687 [00:10<00:00, 4092.17it/s] \n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 42682/42682 [19:36<00:00, 36.27it/s]\n", + "calculate kernels: 100%|██████████| 42682/42682 [37:05<00:00, 19.18it/s] \n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 42682 built in 3402.171978712082 seconds ---\n", + "[[ 9. 9. 3. ... 4. 3. 4.]\n", + " [ 9. 11. 5. ... 6. 5. 6.]\n", + " [ 3. 5. 16. ... 6. 6. 6.]\n", + " ...\n", + " [ 4. 6. 6. ... 30. 29. 6.]\n", + " [ 3. 5. 6. ... 29. 29. 6.]\n", + " [ 4. 6. 6. ... 6. 6. 11.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 7%|▋ | 70/1000 [1:34:57<227:25:45, 880.37s/it]" + ] + } + ], + "source": [ + "# NCI-HIV dataset (labeled?, directed?, cyclic, classification)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n", + "datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 1000, 21), normalize = False, \\\n", + " datafile_y = datafile_y, model_type = 'classification')\n", + "\n", + "# kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para,\n", + "# normalize = False, datafile_y = datafile_y, model_type = 'classification')\n", + "\n", + "# kernel_para['k_func'] = 'minmax'\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of cyclic pattern kernel of size 999 built in 18.78946042060852 seconds ---\n", + "(array([[11., 5., 5., ..., 6., 7., 3.],\n", + " [ 5., 16., 6., ..., 5., 5., 3.],\n", + " [ 5., 6., 8., ..., 4., 5., 3.],\n", + " ...,\n", + " [ 6., 5., 4., ..., 17., 7., 4.],\n", + " [ 7., 5., 5., ..., 7., 15., 4.],\n", + " [ 3., 3., 3., ..., 4., 4., 11.]]), 18.78946042060852)\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "# datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n", + "# datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n", + "# dataset, y = loadDataset(datafile, datafile_y)\n", + "G1 = dataset[1]\n", + "G2 = dataset[2]\n", + "G3 = dataset[3]\n", + "G4 = dataset[4]\n", + "G5 = dataset[5]\n", + "data = [G1, G2, G3, G4, G5]\n", + "nx.draw_networkx(G1)\n", + "plt.show()\n", + "nx.draw_networkx(G2)\n", + "plt.show()\n", + "\n", + "kernel = cyclicpatternkernel(dataset[1:1000], cycle_bound = 1000)\n", + "print(kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 185/185 [00:00<00:00, 2064.69it/s]\n", + "calculate kernels: 100%|██████████| 185/185 [00:00<00:00, 11170.00it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 185 built in 0.10836505889892578 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|██████████| 1000/1000 [00:24<00:00, 36.41it/s]\n", + " Mean performance on train set: 0.018072\n", + "With standard deviation: 0.000000\n", + "\n", + " Mean performance on test set: 0.000000\n", + "With standard deviation: 0.000000\n", + "\n", + "\n", + " accur_test std_test accur_train std_train k_time\n", + "------------ ---------- ------------- ----------- --------\n", + " 0 0 0.0180723 0 0.108365\n" + ] + } + ], + "source": [ + "# acyclic dataset (node labeled, edge labeled, undirected, linear + non-linear, regression)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " normalize = False , model_type = 'classification')\n", + "\n", + "# kernel_para['k_func'] = 'minmax'\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb index 93f1626..9890952 100644 --- a/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_marginalizedkernel_acyclic-checkpoint.ipynb @@ -364,6 +364,155 @@ }, { "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a regression problem ---\n", + "\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- marginalized kernel matrix of size 185 built in 1133.0229969024658 seconds ---\n", + "[[ 0.0287062 0.0124634 0.00444444 ..., 0.00606061 0.00606061\n", + " 0.00606061]\n", + " [ 0.0124634 0.01108958 0.00333333 ..., 0.00454545 0.00454545\n", + " 0.00454545]\n", + " [ 0.00444444 0.00333333 0.0287062 ..., 0.00819912 0.00819912\n", + " 0.00975875]\n", + " ..., \n", + " [ 0.00606061 0.00454545 0.00819912 ..., 0.02846735 0.02836907\n", + " 0.02896354]\n", + " [ 0.00606061 0.00454545 0.00819912 ..., 0.02836907 0.02831424\n", + " 0.0288712 ]\n", + " [ 0.00606061 0.00454545 0.00975875 ..., 0.02896354 0.0288712\n", + " 0.02987915]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 12.186285\n", + "With standard deviation: 7.038988\n", + "\n", + " Mean performance on test set: 18.024312\n", + "With standard deviation: 6.292466\n", + "\n", + "\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 18.0243 6.29247 12.1863 7.03899 1133.02\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import numpy as np\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.marginalizedKernel import marginalizedkernel, _marginalizedkernel_do\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', itr = 20, p_quit = 0.1)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n", + "# hyper_name = 'p_quit', hyper_range = np.linspace(0.1, 0.9, 9), normalize = False)\n", + "\n", + "%lprun -f _marginalizedkernel_do \\\n", + " kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n", + " normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Timer unit: 1e-06 s\n", + "\n", + "Total time: 828.879 s\n", + "File: ../pygraph/kernels/marginalizedKernel.py\n", + "Function: _marginalizedkernel_do at line 67\n", + "\n", + "Line # Hits Time Per Hit % Time Line Contents\n", + "==============================================================\n", + " 67 def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):\n", + " 68 \"\"\"Calculate marginalized graph kernel between 2 graphs.\n", + " 69 \n", + " 70 Parameters\n", + " 71 ----------\n", + " 72 G1, G2 : NetworkX graphs\n", + " 73 2 graphs between which the kernel is calculated.\n", + " 74 node_label : string\n", + " 75 node attribute used as label.\n", + " 76 edge_label : string\n", + " 77 edge attribute used as label.\n", + " 78 p_quit : integer\n", + " 79 the termination probability in the random walks generating step.\n", + " 80 itr : integer\n", + " 81 time of iterations to calculate R_inf.\n", + " 82 \n", + " 83 Return\n", + " 84 ------\n", + " 85 kernel : float\n", + " 86 Marginalized Kernel between 2 graphs.\n", + " 87 \"\"\"\n", + " 88 # init parameters\n", + " 89 17205 12886.0 0.7 0.0 kernel = 0\n", + " 90 17205 52542.0 3.1 0.0 num_nodes_G1 = nx.number_of_nodes(G1)\n", + " 91 17205 28240.0 1.6 0.0 num_nodes_G2 = nx.number_of_nodes(G2)\n", + " 92 17205 15595.0 0.9 0.0 p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)\n", + " 93 17205 11587.0 0.7 0.0 p_init_G2 = 1 / num_nodes_G2\n", + " 94 \n", + " 95 17205 11663.0 0.7 0.0 q = p_quit * p_quit\n", + " 96 17205 10728.0 0.6 0.0 r1 = q\n", + " 97 \n", + " 98 # initial R_inf\n", + " 99 17205 38412.0 2.2 0.0 R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes\n", + " 100 \n", + " 101 # calculate R_inf with a simple interative method\n", + " 102 344100 329235.0 1.0 0.0 for i in range(1, itr):\n", + " 103 326895 900354.0 2.8 0.1 R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])\n", + " 104 326895 2287346.0 7.0 0.3 R_inf_new.fill(r1)\n", + " 105 \n", + " 106 # calculate R_inf for each pair of nodes\n", + " 107 2653464 3667117.0 1.4 0.4 for node1 in G1.nodes(data = True):\n", + " 108 2326569 7522840.0 3.2 0.9 neighbor_n1 = G1[node1[0]]\n", + " 109 2326569 3492118.0 1.5 0.4 p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)\n", + " 110 24024379 27775021.0 1.2 3.4 for node2 in G2.nodes(data = True):\n", + " 111 21697810 69471941.0 3.2 8.4 neighbor_n2 = G2[node2[0]]\n", + " 112 21697810 32446626.0 1.5 3.9 p_trans_n2 = (1 - p_quit) / len(neighbor_n2) \n", + " 113 \n", + " 114 59095092 52545370.0 0.9 6.3 for neighbor1 in neighbor_n1:\n", + " 115 104193150 92513935.0 0.9 11.2 for neighbor2 in neighbor_n2:\n", + " 116 \n", + " 117 t = p_trans_n1 * p_trans_n2 * \\\n", + " 118 66795868 285324518.0 4.3 34.4 deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \\\n", + " 119 66795868 137934393.0 2.1 16.6 deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])\n", + " 120 66795868 106834143.0 1.6 12.9 R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)\n", + " 121 \n", + " 122 326895 1123677.0 3.4 0.1 R_inf[:] = R_inf_new\n", + " 123 \n", + " 124 # add elements of R_inf up and calculate kernel\n", + " 125 139656 330283.0 2.4 0.0 for node1 in G1.nodes(data = True):\n", + " 126 1264441 1435263.0 1.1 0.2 for node2 in G2.nodes(data = True): \n", + " 127 1141990 1377134.0 1.2 0.2 s = p_init_G1 * p_init_G2 * deltakernel(node1[1][node_label] == node2[1][node_label])\n", + " 128 1141990 1375456.0 1.2 0.2 kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)\n", + " 129 \n", + " 130 17205 10801.0 0.6 0.0 return kernel" + ] + }, + { + "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false diff --git a/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb index bdb4b16..12df241 100644 --- a/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_pathkernel_acyclic-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -15,13 +15,11 @@ " --- This is a regression problem ---\n", "\n", "\n", - "\n", - "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- mean average path kernel matrix of size 185 built in 132.2242877483368 seconds ---\n", + " --- mean average path kernel matrix of size 185 built in 29.430902242660522 seconds ---\n", "[[ 0.55555556 0.22222222 0. ..., 0. 0. 0. ]\n", " [ 0.22222222 0.27777778 0. ..., 0. 0. 0. ]\n", " [ 0. 0. 0.55555556 ..., 0.03030303 0.03030303\n", @@ -36,16 +34,16 @@ "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on train set: 3.761907\n", - "With standard deviation: 0.702594\n", + " Mean performance on train set: 3.619948\n", + "With standard deviation: 0.512351\n", "\n", - " Mean performance on test set: 14.001515\n", - "With standard deviation: 6.936023\n", + " Mean performance on test set: 18.418852\n", + "With standard deviation: 10.781119\n", "\n", "\n", - " RMSE_test std_test RMSE_train std_train k_time\n", + " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 14.0015 6.93602 3.76191 0.702594 132.224\n" + " 18.4189 10.7811 3.61995 0.512351 29.4309\n" ] } ], @@ -62,10 +60,10 @@ "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type')\n", "\n", - "kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = True)\n", + "kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = False)\n", "\n", "# %lprun -f _pathkernel_do \\\n", - "# kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = True)" + "# kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = False)" ] }, { @@ -84,7 +82,7 @@ "# without y normalization\n", " RMSE_test std_test RMSE_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 18.4189 10.7811 3.61995 0.512351 37.0017" + " 18.4189 10.7811 3.61995 0.512351 29.4309" ] }, { diff --git a/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb index 8466693..3c7e9d4 100644 --- a/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_spkernel_acyclic-checkpoint.ipynb @@ -2,44 +2,42 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The line_profiler extension is already loaded. To reload it, use:\n", - " %reload_ext line_profiler\n", "\n", " --- This is a regression problem ---\n", "\n", "\n", - "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", - "--- shortest path kernel matrix of size 185 built in 14.576777696609497 seconds ---\n", - "[[ 3. 1. 3. ..., 1. 1. 1.]\n", - " [ 1. 6. 1. ..., 0. 0. 3.]\n", - " [ 3. 1. 3. ..., 1. 1. 1.]\n", - " ..., \n", - " [ 1. 0. 1. ..., 55. 21. 7.]\n", - " [ 1. 0. 1. ..., 21. 55. 7.]\n", - " [ 1. 3. 1. ..., 7. 7. 55.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + "--- shortest path kernel matrix of size 185 built in 13.3865065574646 seconds ---\n", + "[[ 3. 1. 3. ... 1. 1. 1.]\n", + " [ 1. 6. 1. ... 0. 0. 3.]\n", + " [ 3. 1. 3. ... 1. 1. 1.]\n", + " ...\n", + " [ 1. 0. 1. ... 55. 21. 7.]\n", + " [ 1. 0. 1. ... 21. 55. 7.]\n", + " [ 1. 3. 1. ... 7. 7. 55.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 757.54it/s]\n", " Mean performance on train set: 28.360361\n", "With standard deviation: 1.357183\n", "\n", " Mean performance on test set: 35.191954\n", "With standard deviation: 4.495767\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 771.22it/s]\n", "\n", "\n", - " RMSE_test std_test RMSE_train std_train k_time\n", + " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 35.192 4.49577 28.3604 1.35718 14.5768\n" + " 35.192 4.49577 28.3604 1.35718 13.3865\n" ] } ], diff --git a/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb index 41ee8d3..59daf4d 100644 --- a/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb @@ -2,15 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The line_profiler extension is already loaded. To reload it, use:\n", - " %reload_ext line_profiler\n", "\n", " --- This is a regression problem ---\n", "\n", @@ -19,68 +17,34 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- treelet kernel matrix of size 185 built in 0.48417091369628906 seconds ---\n", - "[[ 4.00000000e+00 2.60653066e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 2.60653066e+00 6.00000000e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 1.00000000e+00 1.00000000e+00 4.00000000e+00 ..., 3.00000000e+00\n", - " 3.00000000e+00 3.00000000e+00]\n", - " ..., \n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.80000000e+01\n", - " 1.30548713e+01 8.19020657e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.30548713e+01\n", - " 2.20000000e+01 9.71901120e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 8.19020657e+00\n", - " 9.71901120e+00 1.60000000e+01]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- treelet kernel matrix of size 185 built in 0.47543811798095703 seconds ---\n", + "[[4.00000000e+00 2.60653066e+00 1.00000000e+00 ... 1.26641655e-14\n", + " 1.26641655e-14 1.26641655e-14]\n", + " [2.60653066e+00 6.00000000e+00 1.00000000e+00 ... 1.26641655e-14\n", + " 1.26641655e-14 1.26641655e-14]\n", + " [1.00000000e+00 1.00000000e+00 4.00000000e+00 ... 3.00000000e+00\n", + " 3.00000000e+00 3.00000000e+00]\n", + " ...\n", + " [1.26641655e-14 1.26641655e-14 3.00000000e+00 ... 1.80000000e+01\n", + " 1.30548713e+01 8.19020657e+00]\n", + " [1.26641655e-14 1.26641655e-14 3.00000000e+00 ... 1.30548713e+01\n", + " 2.20000000e+01 9.71901120e+00]\n", + " [1.26641655e-14 1.26641655e-14 3.00000000e+00 ... 8.19020657e+00\n", + " 9.71901120e+00 1.60000000e+01]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 796.45it/s]\n", " Mean performance on train set: 2.688029\n", "With standard deviation: 1.541623\n", "\n", " Mean performance on test set: 10.099738\n", "With standard deviation: 5.035844\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 745.11it/s]\n", "\n", "\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 10.0997 5.03584 2.68803 1.54162 0.484171\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - "\n", - " Loading dataset from file...\n", - "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- treelet kernel matrix of size 185 built in 0.5003015995025635 seconds ---\n", - "[[ 4.00000000e+00 2.60653066e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 2.60653066e+00 6.00000000e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 1.00000000e+00 1.00000000e+00 4.00000000e+00 ..., 3.00000000e+00\n", - " 3.00000000e+00 3.00000000e+00]\n", - " ..., \n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.80000000e+01\n", - " 1.30548713e+01 8.19020657e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.30548713e+01\n", - " 2.20000000e+01 9.71901120e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 8.19020657e+00\n", - " 9.71901120e+00 1.60000000e+01]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on train set: 2.908869\n", - "With standard deviation: 1.267900\n", - "\n", - " Mean performance on test set: 8.307902\n", - "With standard deviation: 3.378376\n", - "\n", - "\n", - " rmse_test std_test rmse_train std_train k_time\n", - "----------- ---------- ------------ ----------- --------\n", - " 8.3079 3.37838 2.90887 1.2679 0.500302\n" + " 10.0997 5.03584 2.68803 1.54162 0.475438\n" ] } ], @@ -99,8 +63,6 @@ "\n", "kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", "\n", - "kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = True)\n", - "\n", "# %lprun -f treeletkernel \\\n", "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)" ] @@ -121,14 +83,58 @@ "# without y normalization\n", " RMSE_test std_test RMSE_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 10.0997 5.03584 2.68803 1.54162 0.484171" + " 10.0997 5.03584 2.68803 1.54162 0.484171\n", + "\n", + " \n", + "\n", + "# G0 -> WL subtree h = 0\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 13.9223 2.88611 13.373 0.653301 0.186731\n", + "\n", + "# G0 U G1 U G6 U G8 U G13 -> WL subtree h = 1\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 8.97706 2.90771 6.7343 1.17505 0.223171\n", + " \n", + "# all patterns \\ { G3 U G4 U G5 U G10 } -> WL subtree h = 2 \n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 7.31274 1.96289 3.73909 0.406267 0.294902\n", + "\n", + "# all patterns \\ { G4 U G5 } -> WL subtree h = 3\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 8.39977 2.78309 3.8606 1.58686 0.348912\n", + "\n", + "# all patterns \\ { G5 } \n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 9.47647 4.22113 3.18029 1.5669 0.423638\n", + " \n", + " \n", + " \n", + "# G0, -> WL subtree h = 0\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 13.9223 2.88611 13.373 0.653301 0.186731 \n", + " \n", + "# G0 U G1 U G2 U G6 U G8 U G13 -> WL subtree h = 1\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 8.62431 2.54327 5.63422 0.255002 0.290797\n", + " \n", + "# all patterns \\ { G5 U G10 } -> WL subtree h = 2\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 10.1294 3.50275 3.69664 1.55116 0.418498" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [ { diff --git a/notebooks/.ipynb_checkpoints/run_treepatternkernel-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_treepatternkernel-checkpoint.ipynb new file mode 100644 index 0000000..a5d9ae6 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/run_treepatternkernel-checkpoint.ipynb @@ -0,0 +1,3191 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a regression problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-10 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.36956548690796 seconds ---\n", + "[[ 13. 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13. ... 29. 29.\n", + " 29. ]\n", + " ...\n", + " [ 20. 20. 29. ... 365.00000001 365.00000001\n", + " 365.00000001]\n", + " [ 20. 20. 29. ... 365.00000001 365.00000001\n", + " 365.00000001]\n", + " [ 20. 20. 29. ... 365.00000001 365.00000001\n", + " 365.00000002]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 91%|█████████▏| 914/1000 [00:01<00:00, 751.28it/s]\n", + " Mean performance on train set: 5.993535\n", + "With standard deviation: 0.356922\n", + "\n", + " Mean performance on test set: 7.464904\n", + "With standard deviation: 1.718585\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 795.88it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-09 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.47467517852783 seconds ---\n", + "[[ 13. 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.00000001 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13. ... 29. 29.\n", + " 29. ]\n", + " ...\n", + " [ 20. 20. 29. ... 365.00000015 365.00000015\n", + " 365.00000015]\n", + " [ 20. 20. 29. ... 365.00000015 365.00000015\n", + " 365.00000015]\n", + " [ 20. 20. 29. ... 365.00000015 365.00000015\n", + " 365.0000002 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 654.33it/s]\n", + " Mean performance on train set: 5.963041\n", + "With standard deviation: 0.374107\n", + "\n", + " Mean performance on test set: 7.375105\n", + "With standard deviation: 1.769252\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 711.24it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-08 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.32968211174011 seconds ---\n", + "[[ 13.00000004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.00000008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.00000004 ... 29.00000004 29.00000004\n", + " 29.00000004]\n", + " ...\n", + " [ 20. 20. 29.00000004 ... 365.00000148 365.00000148\n", + " 365.00000148]\n", + " [ 20. 20. 29.00000004 ... 365.00000148 365.00000148\n", + " 365.00000148]\n", + " [ 20. 20. 29.00000004 ... 365.00000148 365.00000148\n", + " 365.00000202]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 967/1000 [00:01<00:00, 809.48it/s]\n", + " Mean performance on train set: 5.965110\n", + "With standard deviation: 0.378249\n", + "\n", + " Mean performance on test set: 7.350689\n", + "With standard deviation: 1.780556\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 786.78it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-07 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.74151062965393 seconds ---\n", + "[[ 13.0000004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.0000008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.0000004 ... 29.0000004 29.0000004\n", + " 29.0000004]\n", + " ...\n", + " [ 20. 20. 29.0000004 ... 365.0000148 365.0000148\n", + " 365.0000148]\n", + " [ 20. 20. 29.0000004 ... 365.0000148 365.0000148\n", + " 365.0000148]\n", + " [ 20. 20. 29.0000004 ... 365.0000148 365.0000148\n", + " 365.0000202]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 735.76it/s]\n", + " Mean performance on train set: 5.966982\n", + "With standard deviation: 0.382093\n", + "\n", + " Mean performance on test set: 7.350999\n", + "With standard deviation: 1.781470\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 804.24it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-06 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.52131748199463 seconds ---\n", + "[[ 13.000004 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.000008 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.000004 ... 29.000004 29.000004 29.000004]\n", + " ...\n", + " [ 20. 20. 29.000004 ... 365.000148 365.000148 365.000148]\n", + " [ 20. 20. 29.000004 ... 365.000148 365.000148 365.000148]\n", + " [ 20. 20. 29.000004 ... 365.000148 365.000148 365.000202]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 970/1000 [00:01<00:00, 759.32it/s]\n", + " Mean performance on train set: 5.969758\n", + "With standard deviation: 0.386318\n", + "\n", + " Mean performance on test set: 7.351225\n", + "With standard deviation: 1.780522\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 783.42it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-05 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.67099857330322 seconds ---\n", + "[[ 13.00004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.00008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.00004 ... 29.00004 29.00004\n", + " 29.00004 ]\n", + " ...\n", + " [ 20. 20. 29.00004 ... 365.00148001 365.00148001\n", + " 365.00148 ]\n", + " [ 20. 20. 29.00004 ... 365.00148001 365.00148001\n", + " 365.00148 ]\n", + " [ 20. 20. 29.00004 ... 365.00148 365.00148\n", + " 365.00202 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 801.70it/s]\n", + " Mean performance on train set: 5.970557\n", + "With standard deviation: 0.390719\n", + "\n", + " Mean performance on test set: 7.348129\n", + "With standard deviation: 1.780293\n", + "\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.0001 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 36.80127692222595 seconds ---\n", + "[[ 13.0004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.0008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.0004 ... 29.0004 29.0004\n", + " 29.0004 ]\n", + " ...\n", + " [ 20. 20. 29.0004 ... 365.01480072 365.01480072\n", + " 365.0148 ]\n", + " [ 20. 20. 29.0004 ... 365.01480072 365.01480072\n", + " 365.0148 ]\n", + " [ 20. 20. 29.0004 ... 365.0148 365.0148\n", + " 365.0202 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 889.41it/s]\n", + " Mean performance on train set: 5.942495\n", + "With standard deviation: 0.331983\n", + "\n", + " Mean performance on test set: 7.349836\n", + "With standard deviation: 1.781100\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 883.76it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.001 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 35.8681423664093 seconds ---\n", + "[[ 13.004 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.008 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.004 ... 29.004 29.004 29.004 ]\n", + " ...\n", + " [ 20. 20. 29.004 ... 365.148072 365.148072 365.148 ]\n", + " [ 20. 20. 29.004 ... 365.148072 365.148072 365.148 ]\n", + " [ 20. 20. 29.004 ... 365.148 365.148 365.202 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 99%|█████████▉| 988/1000 [00:01<00:00, 886.54it/s]\n", + " Mean performance on train set: 5.933395\n", + "With standard deviation: 0.324965\n", + "\n", + " Mean performance on test set: 7.357745\n", + "With standard deviation: 1.780977\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 888.00it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.01 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 36.001843214035034 seconds ---\n", + "[[ 13.04 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.08 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.04 ... 29.04 29.04 29.04 ]\n", + " ...\n", + " [ 20. 20. 29.04 ... 366.4872 366.4872 366.48 ]\n", + " [ 20. 20. 29.04 ... 366.4872 366.4872 366.48 ]\n", + " [ 20. 20. 29.04 ... 366.48 366.48 367.02 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 978/1000 [00:01<00:00, 863.94it/s]\n", + " Mean performance on train set: 5.940695\n", + "With standard deviation: 0.347431\n", + "\n", + " Mean performance on test set: 7.374269\n", + "With standard deviation: 1.791145\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 878.96it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.1 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 36.37146854400635 seconds ---\n", + "[[ 13.4 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.8 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.4 ... 29.4 29.4 29.4 ]\n", + " ...\n", + " [ 20. 20. 29.4 ... 380.52 380.52 379.8 ]\n", + " [ 20. 20. 29.4 ... 380.52 380.52 379.8 ]\n", + " [ 20. 20. 29.4 ... 379.8 379.8 385.2 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 993/1000 [00:01<00:00, 860.40it/s]\n", + " Mean performance on train set: 6.427114\n", + "With standard deviation: 1.293674\n", + "\n", + " Mean performance on test set: 7.329299\n", + "With standard deviation: 1.913634\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 883.01it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.83972358703613 seconds ---\n", + "[[ 17. 14. 4. ... 20. 20. 20.]\n", + " [ 14. 28. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 17. ... 33. 33. 33.]\n", + " ...\n", + " [ 20. 20. 33. ... 585. 585. 513.]\n", + " [ 20. 20. 33. ... 585. 585. 513.]\n", + " [ 20. 20. 33. ... 513. 513. 567.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 979/1000 [00:01<00:00, 616.77it/s]\n", + " Mean performance on train set: 6.624254\n", + "With standard deviation: 1.224196\n", + "\n", + " Mean performance on test set: 7.271336\n", + "With standard deviation: 2.207735\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 630.82it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.50818395614624 seconds ---\n", + "[[5.300e+01 1.400e+01 4.000e+00 ... 2.000e+01 2.000e+01 2.000e+01]\n", + " [1.400e+01 1.000e+02 4.000e+00 ... 2.000e+01 2.000e+01 2.000e+01]\n", + " [4.000e+00 4.000e+00 5.300e+01 ... 6.900e+01 6.900e+01 6.900e+01]\n", + " ...\n", + " [2.000e+01 2.000e+01 6.900e+01 ... 9.045e+03 9.045e+03 1.845e+03]\n", + " [2.000e+01 2.000e+01 6.900e+01 ... 9.045e+03 9.045e+03 1.845e+03]\n", + " [2.000e+01 2.000e+01 6.900e+01 ... 1.845e+03 1.845e+03 2.385e+03]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 8%|▊ | 77/1000 [00:00<00:01, 764.71it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.069543502626658e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1303298666315776e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.70249458866672e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.63992169055093e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.438093960487116e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0002169262936346e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5920339281975188e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5874866272574162e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.0599424240471626e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.468773818521402e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.958334441043603e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 27%|██▋ | 267/1000 [00:00<00:01, 654.82it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/sklearn/linear_model/ridge.py:154: UserWarning: Singular matrix in solving dual problem. Using least-squares solution instead.\n", + " warnings.warn(\"Singular matrix in solving dual problem. Using \"\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.055618175730539e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.7159074038024934e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.895455126720251e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.400306511546424e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.206478316049589e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8083631222444177e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.49051280863482e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.339852738992424e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.277544863160196e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.458523723353626e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 48%|████▊ | 477/1000 [00:00<00:00, 685.70it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6380760737666547e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.3843421259537676e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.890544546973404e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.054758730954765e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.9172765626494813e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4455093698440067e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2914256710839066e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.65667341282596e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.852926745577629e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.285092924342139e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 69%|██████▉ | 694/1000 [00:01<00:00, 712.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.700250453064005e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6205193931367065e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.4925504318417794e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.0111387119813346e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.343123723749221e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0143662852277667e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.206690575125046e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.049999246995425e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.4232350203422674e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.350008400303505e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.251763015291957e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 90%|█████████ | 902/1000 [00:01<00:00, 666.85it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.924869742342744e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4010401637647583e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.5117924740400373e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.626753798403599e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.911227588173856e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0660043401009468e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3099139652029694e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.2680602391853274e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.396574210735164e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 97%|█████████▋| 969/1000 [00:01<00:00, 600.11it/s]\n", + " Mean performance on train set: 6.816974\n", + "With standard deviation: 1.501822\n", + "\n", + " Mean performance on test set: 7.497870\n", + "With standard deviation: 2.368148\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 648.87it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 100.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.09455919265747 seconds ---\n", + "[[4.13000e+02 1.40000e+01 4.00000e+00 ... 2.00000e+01 2.00000e+01\n", + " 2.00000e+01]\n", + " [1.40000e+01 8.20000e+02 4.00000e+00 ... 2.00000e+01 2.00000e+01\n", + " 2.00000e+01]\n", + " [4.00000e+00 4.00000e+00 4.13000e+02 ... 4.29000e+02 4.29000e+02\n", + " 4.29000e+02]\n", + " ...\n", + " [2.00000e+01 2.00000e+01 4.29000e+02 ... 7.35165e+05 7.35165e+05\n", + " 1.51650e+04]\n", + " [2.00000e+01 2.00000e+01 4.29000e+02 ... 7.35165e+05 7.35165e+05\n", + " 1.51650e+04]\n", + " [2.00000e+01 2.00000e+01 4.29000e+02 ... 1.51650e+04 1.51650e+04\n", + " 2.05650e+04]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 10%|▉ | 97/1000 [00:00<00:02, 436.93it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.249229588791739e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6092761314568358e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6033357497241564e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0333531111165975e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.073851980749357e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.497880470461594e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0349745182117167e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 17%|█▋ | 169/1000 [00:00<00:02, 372.64it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.227908145504113e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1894008132724887e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9131474526752795e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0421770253846576e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.864916618602575e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.663676730244888e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 31%|███▏ | 314/1000 [00:00<00:01, 429.29it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6989200751598342e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9988176582222278e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.173259131422707e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.069621878854856e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.087601566853754e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.1827598831940232e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.428244298929586e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1120965359644164e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8329496119281176e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.5128147762765525e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.173155329882729e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 50%|█████ | 501/1000 [00:01<00:00, 516.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.874621241781873e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9047959204426696e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.03000007539236e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.8198639503150797e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.679200342495213e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6211471280327221e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.74280825574767e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.685372827008377e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9723233156997277e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.142362330339379e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.0042133764798303e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.987833375253946e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 71%|███████ | 706/1000 [00:01<00:00, 471.97it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.63949546549065e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.76170805410039e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.3721058293845662e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.178277242767302e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.4883373934010664e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.526360275338589e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.827383891217367e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████ | 807/1000 [00:01<00:00, 457.30it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.052622499085628e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.70793549450487e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8190124240850417e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.349104192126423e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.132340452050677e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 91%|█████████ | 909/1000 [00:01<00:00, 451.90it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.78695142234395e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.8765535280551442e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.9917255115528226e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.781650263544808e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.581768670551366e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0493867289518776e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.9787087068181396e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.756012232435961e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.800283208793992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.468606690086715e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.119459703249427e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 96%|█████████▌| 956/1000 [00:02<00:00, 445.96it/s]\n", + " Mean performance on train set: 6.687664\n", + "With standard deviation: 1.348089\n", + "\n", + " Mean performance on test set: 7.428867\n", + "With standard deviation: 2.647892\n", + "calculate performance: 100%|██████████| 1000/1000 [00:02<00:00, 467.65it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.517051219940186 seconds ---\n", + "[[4.0130000e+03 1.4000000e+01 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [1.4000000e+01 8.0200000e+03 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [4.0000000e+00 4.0000000e+00 4.0130000e+03 ... 4.0290000e+03\n", + " 4.0290000e+03 4.0290000e+03]\n", + " ...\n", + " [2.0000000e+01 2.0000000e+01 4.0290000e+03 ... 7.2148365e+07\n", + " 7.2148365e+07 1.4836500e+05]\n", + " [2.0000000e+01 2.0000000e+01 4.0290000e+03 ... 7.2148365e+07\n", + " 7.2148365e+07 1.4836500e+05]\n", + " [2.0000000e+01 2.0000000e+01 4.0290000e+03 ... 1.4836500e+05\n", + " 1.4836500e+05 2.0236500e+05]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 10%|█ | 102/1000 [00:00<00:02, 330.46it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.374017095746491e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.687507275679712e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.7645414168071277e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8090152927008474e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.474194561968185e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.130856876335615e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 19%|█▉ | 188/1000 [00:00<00:02, 348.40it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.7794644716713837e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.3130943734340723e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.7505566440337117e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3279670378456666e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.318466984022222e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.418445519765442e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 28%|██▊ | 285/1000 [00:00<00:01, 380.47it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.1356199672921913e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6464620684950592e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.179430869121561e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.473887201835687e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.570987725305032e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.854640029504099e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 38%|███▊ | 385/1000 [00:00<00:01, 389.24it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3781828327775562e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.955507306233033e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.109947315270106e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.9378280303294975e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.866942525478256e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 49%|████▉ | 494/1000 [00:01<00:01, 398.83it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.636014964778956e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6552163232757833e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.312233993243073e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.2794244316598437e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.406028628818668e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 60%|██████ | 604/1000 [00:01<00:00, 407.30it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.184686103929999e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.1281587780183657e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.367013528660628e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.155116554595105e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.443029464120917e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.4923079446940085e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.750703061909557e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 71%|███████ | 707/1000 [00:01<00:00, 402.63it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4970615125032324e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.39494402062226e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.818999641865095e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.051275910233908e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.688197813410084e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████ | 806/1000 [00:01<00:00, 404.11it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.43659662072146e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.4284104102664825e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.900304923444742e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.1112688931900636e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.941184656304436e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.746476431972804e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 91%|█████████ | 906/1000 [00:02<00:00, 407.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.327409345420052e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0589341144557062e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.29154188313992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.225020130252359e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.327720201864263e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 95%|█████████▌| 951/1000 [00:02<00:00, 373.87it/s]\n", + " Mean performance on train set: 6.819058\n", + "With standard deviation: 1.410085\n", + "\n", + " Mean performance on test set: 7.249143\n", + "With standard deviation: 2.655536\n", + "calculate performance: 100%|██████████| 1000/1000 [00:02<00:00, 414.03it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.193651783291256e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.19984276961351e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0689487149937185e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.8956869823870564e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.790887064559792e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.01269268989563 seconds ---\n", + "[[4.00130000e+04 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00200000e+04 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00130000e+04 ... 4.00290000e+04\n", + " 4.00290000e+04 4.00290000e+04]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00290000e+04 ... 7.20148036e+09\n", + " 7.20148036e+09 1.48036500e+06]\n", + " [2.00000000e+01 2.00000000e+01 4.00290000e+04 ... 7.20148036e+09\n", + " 7.20148036e+09 1.48036500e+06]\n", + " [2.00000000e+01 2.00000000e+01 4.00290000e+04 ... 1.48036500e+06\n", + " 1.48036500e+06 2.02036500e+06]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 11%|█▏ | 114/1000 [00:00<00:02, 303.84it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6978549111114387e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1828172674052679e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.547017114313022e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.990786470945978e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.8300974250606965e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.727582936838558e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 21%|██ | 208/1000 [00:00<00:02, 342.60it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.569354559683504e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.885865920757663e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.006883677471783e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2491482969368813e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.596699463334369e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.74024827114128e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0002891123411383e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 31%|███ | 311/1000 [00:00<00:01, 371.07it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.478112292477647e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.3626069490970097e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.751107853461428e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.007160521891646e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.592912372477283e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 41%|████▏ | 414/1000 [00:01<00:01, 394.40it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.286966904954778e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.330065717080935e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0639972759819077e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3250852692883386e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.32507703774876e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.514317701906229e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 51%|█████▏ | 514/1000 [00:01<00:01, 411.17it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5186106207732215e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2309907116861648e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.582391203608073e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.638503426307468e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0580872068659216e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 62%|██████▏ | 615/1000 [00:01<00:00, 424.96it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.633316922861427e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.253166879599146e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4688155478710103e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.3252803222906435e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.713726552669558e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.934042177466841e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.468592067289146e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 72%|███████▏ | 716/1000 [00:01<00:00, 433.84it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.66227030386163e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5342528466878185e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.620745355332433e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5797394734563764e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.109079490079661e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.548426603146641e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0484899604694826e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 82%|████████▏ | 816/1000 [00:01<00:00, 439.46it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.080827240476694e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.8279322282295696e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0410721959495632e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.343375151645726e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.351177509861134e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.370583478449445e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 92%|█████████▏| 916/1000 [00:02<00:00, 437.07it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.3677978521118296e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0227997187914302e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.557088603475233e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.657645350184021e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.990628121216557e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 96%|█████████▌| 962/1000 [00:02<00:00, 393.51it/s]\n", + " Mean performance on train set: 6.934306\n", + "With standard deviation: 1.384412\n", + "\n", + " Mean performance on test set: 7.081832\n", + "With standard deviation: 2.624800\n", + "calculate performance: 100%|██████████| 1000/1000 [00:02<00:00, 420.16it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2964047050969517e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.089708488833387e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3234351109582e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.293818265362604e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.44555950404844e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 100000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 35.835275411605835 seconds ---\n", + "[[4.0001300e+05 1.4000000e+01 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [1.4000000e+01 8.0002000e+05 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [4.0000000e+00 4.0000000e+00 4.0001300e+05 ... 4.0002900e+05\n", + " 4.0002900e+05 4.0002900e+05]\n", + " ...\n", + " [2.0000000e+01 2.0000000e+01 4.0002900e+05 ... 7.2001480e+11\n", + " 7.2001480e+11 1.4800365e+07]\n", + " [2.0000000e+01 2.0000000e+01 4.0002900e+05 ... 7.2001480e+11\n", + " 7.2001480e+11 1.4800365e+07]\n", + " [2.0000000e+01 2.0000000e+01 4.0002900e+05 ... 1.4800365e+07\n", + " 1.4800365e+07 2.0200365e+07]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 13%|█▎ | 126/1000 [00:00<00:03, 261.95it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0191112815027622e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.232307074954237e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.097885039345644e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.727869526025791e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 21%|██ | 208/1000 [00:00<00:03, 262.44it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2411064955279154e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.149899322677468e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.642478117522784e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 30%|███ | 305/1000 [00:01<00:02, 265.15it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.785067039039337e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.225934530879337e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.562790184640486e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 41%|████▏ | 414/1000 [00:01<00:01, 297.05it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.135758789917749e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.564182826681079e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.542436590136228e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 50%|█████ | 500/1000 [00:01<00:01, 319.27it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.195315905739342e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.379873880613873e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.630975349505465e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.518071272961898e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 61%|██████▏ | 613/1000 [00:02<00:01, 303.76it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.9460047081015216e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.839254463570563e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.318583704180543e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 73%|███████▎ | 728/1000 [00:02<00:00, 300.34it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.3984244112232524e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3291156014339405e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.707621088224988e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0690013288979288e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████▏ | 813/1000 [00:02<00:00, 321.97it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.315773170165585e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.138544398203078e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.386508127676171e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 89%|████████▉ | 891/1000 [00:02<00:00, 317.18it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6842499515474312e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.528434456947986e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.856433851414765e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 96%|█████████▌| 957/1000 [00:03<00:00, 259.36it/s]\n", + " Mean performance on train set: 9.394995\n", + "With standard deviation: 1.047066\n", + "\n", + " Mean performance on test set: 8.237631\n", + "With standard deviation: 3.665300\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 299.25it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.056791553686018e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.644703618966645e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.15242050721053e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.77732253074646 seconds ---\n", + "[[4.00001300e+06 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00002000e+06 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00001300e+06 ... 4.00002900e+06\n", + " 4.00002900e+06 4.00002900e+06]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00002900e+06 ... 7.20001480e+13\n", + " 7.20001480e+13 1.48000365e+08]\n", + " [2.00000000e+01 2.00000000e+01 4.00002900e+06 ... 7.20001480e+13\n", + " 7.20001480e+13 1.48000365e+08]\n", + " [2.00000000e+01 2.00000000e+01 4.00002900e+06 ... 1.48000365e+08\n", + " 1.48000365e+08 2.02000365e+08]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 14%|█▎ | 135/1000 [00:00<00:03, 253.06it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.741195162637844e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5965964498458038e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.983361443347492e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.7922291165206923e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.685526054240851e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 23%|██▎ | 230/1000 [00:00<00:02, 265.75it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6848871984797616e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.8811603375005575e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.7514073450053307e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5976314128410034e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.790988115471154e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.477178586927344e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.035215871851048e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 33%|███▎ | 332/1000 [00:01<00:02, 277.35it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.287317576627726e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.886325173924881e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8372812242318245e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.523017625167697e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.309735186090854e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 43%|████▎ | 431/1000 [00:01<00:02, 258.66it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2092887442597021e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.444285941342485e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6650957319224102e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.778343353100153e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.067496740668901e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.832366528737191e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 53%|█████▎ | 534/1000 [00:01<00:01, 252.20it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2368084459111367e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.625050966790768e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.786992563738048e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5350081600164477e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.2823286559994256e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.880362578796432e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1100444346816681e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 64%|██████▎ | 637/1000 [00:02<00:01, 260.77it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.1335408080135885e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4688002952482946e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6678722631357644e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.288251804550535e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.895017980474164e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 74%|███████▍ | 741/1000 [00:02<00:00, 276.66it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.209157188853246e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4519382674684447e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5748326244710203e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.852436562697074e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.536506069365062e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.969036612017947e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████▏ | 813/1000 [00:02<00:00, 297.62it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.309383987394578e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.565067260805818e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.516194197490843e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.041064934861363e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.499601220689098e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.051151158798192e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 92%|█████████▏| 915/1000 [00:03<00:00, 298.37it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.7472428919435347e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.703770718809819e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2818242346374262e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.102554831016506e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.599188283622467e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0647003359802031e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 98%|█████████▊| 977/1000 [00:03<00:00, 278.57it/s]\n", + " Mean performance on train set: 9.635245\n", + "With standard deviation: 0.687560\n", + "\n", + " Mean performance on test set: 8.529828\n", + "With standard deviation: 3.580591\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 284.18it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.502409510736216e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0439081147173944e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.4602149061556115e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.9479003259500843e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.34621741763787e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0245542507930726e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.46095633506775 seconds ---\n", + "[[4.00000130e+07 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00000200e+07 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00000130e+07 ... 4.00000290e+07\n", + " 4.00000290e+07 4.00000290e+07]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00000290e+07 ... 7.20000148e+15\n", + " 7.20000148e+15 1.48000036e+09]\n", + " [2.00000000e+01 2.00000000e+01 4.00000290e+07 ... 7.20000148e+15\n", + " 7.20000148e+15 1.48000036e+09]\n", + " [2.00000000e+01 2.00000000e+01 4.00000290e+07 ... 1.48000036e+09\n", + " 1.48000036e+09 2.02000036e+09]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 12%|█▏ | 122/1000 [00:00<00:03, 231.16it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.200122265640733e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.627297027618617e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.893611596005168e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.1052758523976415e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.158312302718829e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.675650470006637e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 23%|██▎ | 227/1000 [00:00<00:03, 253.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2230286797750079e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.0790289882373515e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6170735381557016e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.90989130271085e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.4735492461286675e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.544548737558878e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 34%|███▍ | 344/1000 [00:01<00:02, 274.36it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0569387542185164e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.8929851773550792e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.762490810329375e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.031994076021703e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.380795983197089e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 44%|████▎ | 437/1000 [00:01<00:02, 281.37it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.159845331824398e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.592173545119868e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0031182975801337e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.315102488294594e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.427690586128571e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.36815306216013e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 52%|█████▎ | 525/1000 [00:01<00:01, 277.96it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.096509347533013e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2037051514926243e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.641239932076709e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.911654384898199e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.195245830759744e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 61%|██████ | 606/1000 [00:02<00:01, 236.45it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.179739003035368e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.121167134816686e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.201750997217992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.6143396276266097e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.876412427833992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.158416120381036e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 73%|███████▎ | 727/1000 [00:02<00:01, 211.65it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.355737066205781e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.778261189640049e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.992328403436096e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.166526583123927e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.566565138343654e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.272056103177315e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 82%|████████▏ | 822/1000 [00:03<00:00, 223.94it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2435735437353417e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.471379508084743e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.686971341479104e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.827108937015577e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 93%|█████████▎| 931/1000 [00:03<00:00, 255.06it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.2357280081107672e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.559072238694825e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.45488311322705e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.6616151717441874e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.807701664283496e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 98%|█████████▊| 983/1000 [00:04<00:00, 220.81it/s]\n", + " Mean performance on train set: 11.059074\n", + "With standard deviation: 1.323635\n", + "\n", + " Mean performance on test set: 10.964175\n", + "With standard deviation: 3.358726\n", + "calculate performance: 100%|██████████| 1000/1000 [00:04<00:00, 242.35it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 100000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.749823872976888e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.347208969568296e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9981807042259307e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.298232968104139e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.391381083354749e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.328796748008544e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.91001057624817 seconds ---\n", + "[[4.00000013e+08 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00000020e+08 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00000013e+08 ... 4.00000029e+08\n", + " 4.00000029e+08 4.00000029e+08]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00000029e+08 ... 7.20000015e+17\n", + " 7.20000015e+17 1.48000004e+10]\n", + " [2.00000000e+01 2.00000000e+01 4.00000029e+08 ... 7.20000015e+17\n", + " 7.20000015e+17 1.48000004e+10]\n", + " [2.00000000e+01 2.00000000e+01 4.00000029e+08 ... 1.48000004e+10\n", + " 1.48000004e+10 2.02000004e+10]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 992/1000 [00:03<00:00, 267.91it/s]\n", + " Mean performance on train set: 66.147687\n", + "With standard deviation: 11.979989\n", + "\n", + " Mean performance on test set: 63.221208\n", + "With standard deviation: 13.381090\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 252.05it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1000000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 38.43676400184631 seconds ---\n", + "[[4.00000001e+09 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00000002e+09 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00000001e+09 ... 4.00000003e+09\n", + " 4.00000003e+09 4.00000003e+09]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00000003e+09 ... 7.20000001e+19\n", + " 7.20000001e+19 1.48000000e+11]\n", + " [2.00000000e+01 2.00000000e+01 4.00000003e+09 ... 7.20000001e+19\n", + " 7.20000001e+19 1.48000000e+11]\n", + " [2.00000000e+01 2.00000000e+01 4.00000003e+09 ... 1.48000000e+11\n", + " 1.48000000e+11 2.02000000e+11]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 976/1000 [00:04<00:00, 268.70it/s]\n", + " Mean performance on train set: 96.664827\n", + "With standard deviation: 1.871320\n", + "\n", + " Mean performance on test set: 100.134704\n", + "With standard deviation: 13.845906\n", + "calculate performance: 100%|██████████| 1000/1000 [00:04<00:00, 236.90it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10000000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.32151246070862 seconds ---\n", + "[[4.00e+10 1.40e+01 4.00e+00 ... 2.00e+01 2.00e+01 2.00e+01]\n", + " [1.40e+01 8.00e+10 4.00e+00 ... 2.00e+01 2.00e+01 2.00e+01]\n", + " [4.00e+00 4.00e+00 4.00e+10 ... 4.00e+10 4.00e+10 4.00e+10]\n", + " ...\n", + " [2.00e+01 2.00e+01 4.00e+10 ... 7.20e+21 7.20e+21 1.48e+12]\n", + " [2.00e+01 2.00e+01 4.00e+10 ... 7.20e+21 7.20e+21 1.48e+12]\n", + " [2.00e+01 2.00e+01 4.00e+10 ... 1.48e+12 1.48e+12 2.02e+12]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|█████████▉| 997/1000 [00:03<00:00, 268.42it/s]\n", + " Mean performance on train set: 98.175092\n", + "With standard deviation: 4.720613\n", + "\n", + " Mean performance on test set: 100.144883\n", + "With standard deviation: 13.958659\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 260.49it/s]\n", + "\n", + "\n", + " lmda rmse_test std_test rmse_train std_train k_time\n", + "----------- ----------- ---------- ------------ ----------- --------\n", + " 1e-10 7.4649 1.71858 5.99354 0.356922 37.3696\n", + " 1e-09 7.37511 1.76925 5.96304 0.374107 37.4747\n", + " 1e-08 7.35069 1.78056 5.96511 0.378249 37.3297\n", + " 1e-07 7.351 1.78147 5.96698 0.382093 37.7415\n", + " 1e-06 7.35123 1.78052 5.96976 0.386318 37.5213\n", + " 1e-05 7.34813 1.78029 5.97056 0.390719 37.671\n", + " 0.0001 7.34984 1.7811 5.9425 0.331983 36.8013\n", + " 0.001 7.35775 1.78098 5.9334 0.324965 35.8681\n", + " 0.01 7.37427 1.79115 5.94069 0.347431 36.0018\n", + " 0.1 7.3293 1.91363 6.42711 1.29367 36.3715\n", + " 1 7.27134 2.20774 6.62425 1.2242 37.8397\n", + " 10 7.49787 2.36815 6.81697 1.50182 37.5082\n", + " 100 7.42887 2.64789 6.68766 1.34809 37.0946\n", + " 1000 7.24914 2.65554 6.81906 1.41008 37.5171\n", + " 10000 7.08183 2.6248 6.93431 1.38441 37.0127\n", + "100000 8.23763 3.6653 9.395 1.04707 35.8353\n", + " 1e+06 8.52983 3.58059 9.63525 0.68756 37.7773\n", + " 1e+07 10.9642 3.35873 11.0591 1.32363 37.461\n", + " 1e+08 63.2212 13.3811 66.1477 11.98 37.91\n", + " 1e+09 100.135 13.8459 96.6648 1.87132 38.4368\n", + " 1e+10 100.145 13.9587 98.1751 4.72061 37.3215\n" + ] + } + ], + "source": [ + "# tree pattern kernel, dataset acyclic.\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.treePatternKernel import treepatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, \\\n", + " kernel_type = 'untiln', h = 2)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, treepatternkernel, kernel_para, \\\n", + " hyper_name = 'lmda', hyper_range = np.logspace(-10, 10, num = 21, base = 10), \\\n", + " normalize = False, model_type = 'regression')\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# results\n", + "\n", + "# untiln kernel when h = 2\n", + " lmda rmse_test std_test rmse_train std_train k_time\n", + "----------- ----------- ---------- ------------ ----------- --------\n", + " 1e-10 7.46524 1.71862 5.99486 0.356634 38.1447\n", + " 1e-09 7.37326 1.77195 5.96155 0.374395 37.4921\n", + " 1e-08 7.35105 1.78349 5.96481 0.378047 37.9971\n", + " 1e-07 7.35213 1.77903 5.96728 0.382251 38.3182\n", + " 1e-06 7.3524 1.77992 5.9696 0.3863 39.6428\n", + " 1e-05 7.34958 1.78141 5.97114 0.39017 37.3711\n", + " 0.0001 7.3513 1.78136 5.94251 0.331843 37.3967\n", + " 0.001 7.35822 1.78119 5.9326 0.32534 36.7357\n", + " 0.01 7.37552 1.79037 5.94089 0.34763 36.8864\n", + " 0.1 7.32951 1.91346 6.42634 1.29405 36.8382\n", + " 1 7.27134 2.20774 6.62425 1.2242 37.2425\n", + " 10 7.49787 2.36815 6.81697 1.50182 37.8286\n", + " 100 7.42887 2.64789 6.68766 1.34809 36.3701\n", + " 1000 7.24914 2.65554 6.81906 1.41008 36.1695\n", + " 10000 7.08183 2.6248 6.93431 1.38441 37.5723\n", + "100000 8.021 3.43694 8.69813 0.909839 37.8158\n", + " 1e+06 8.49625 3.6332 9.59333 0.96626 38.4688\n", + " 1e+07 10.9067 3.17593 11.5642 2.07792 36.9926\n", + " 1e+08 61.1524 10.4355 65.3527 13.9538 37.1321\n", + " 1e+09 99.943 13.6994 98.8848 5.27014 36.7443\n", + " 1e+10 100.083 13.8503 97.9168 3.22768 37.096\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'atom': 'C', 'label': 'C'}), (1, {'atom': 'C', 'label': 'C'}), (2, {'atom': 'C', 'label': 'C'}), (3, {'atom': 'C', 'label': 'C'}), (4, {'atom': 'S', 'label': 'S'})]\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 39 built in 3.5270774364471436 seconds ---\n", + "(array([[1.99007809e+036, 4.00000000e+000, 4.00000000e+000, ...,\n", + " 1.00000000e+001, 1.00000000e+001, 1.00000000e+001],\n", + " [4.00000000e+000, 6.37886713e+019, 4.34000000e+002, ...,\n", + " 6.37886713e+019, 6.37886713e+019, 6.37886713e+019],\n", + " [4.00000000e+000, 4.34000000e+002, 1.99007809e+036, ...,\n", + " 4.40000000e+002, 4.40000000e+002, 4.40000000e+002],\n", + " ...,\n", + " [1.00000000e+001, 6.37886713e+019, 4.40000000e+002, ...,\n", + " 2.94561201e+119, 1.16903692e+080, 4.42354433e+082],\n", + " [1.00000000e+001, 6.37886713e+019, 4.40000000e+002, ...,\n", + " 1.16903692e+080, 4.21212139e+264, 1.66634383e+080],\n", + " [1.00000000e+001, 6.37886713e+019, 4.40000000e+002, ...,\n", + " 4.42354433e+082, 1.66634383e+080, 5.17763068e+117]]), 3.5270774364471436)\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.utils.utils import kernel_train_test\n", + "\n", + "from pygraph.kernels.treePatternKernel import treepatternkernel, _treepatternkernel_do\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "\n", + "dataset, y = loadDataset(datafile)\n", + "G1 = dataset[100]\n", + "G2 = dataset[20]\n", + "data = [G1, G2]\n", + "# nx.draw_networkx(G1)\n", + "# plt.show()\n", + "# print(G1.nodes(data=True)20\n", + "nx.draw_networkx(G2)\n", + "plt.show()\n", + "print(G2.nodes(data=True))\n", + "\n", + "\n", + "%lprun -f _treepatternkernel_do \\\n", + " kernel = treepatternkernel(dataset[1:40], node_label = 'atom', edge_label = 'bond_type', labeled = True, \\\n", + " kernel_type = 'untiln', lmda = 1, h = 10)\n", + "\n", + "print(kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 185/185 [00:00<00:00, 2064.69it/s]\n", + "calculate kernels: 100%|██████████| 185/185 [00:00<00:00, 11170.00it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 185 built in 0.10836505889892578 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|██████████| 1000/1000 [00:24<00:00, 36.41it/s]\n", + " Mean performance on train set: 0.018072\n", + "With standard deviation: 0.000000\n", + "\n", + " Mean performance on test set: 0.000000\n", + "With standard deviation: 0.000000\n", + "\n", + "\n", + " accur_test std_test accur_train std_train k_time\n", + "------------ ---------- ------------- ----------- --------\n", + " 0 0 0.0180723 0 0.108365\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " normalize = False , model_type = 'classification')\n", + "\n", + "# kernel_para['k_func'] = 'minmax'\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/.ipynb_checkpoints/run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb index 08ef22a..0dbb8f9 100644 --- a/notebooks/.ipynb_checkpoints/run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb @@ -2,300 +2,316 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The line_profiler extension is already loaded. To reload it, use:\n", - " %reload_ext line_profiler\n", "\n", " --- This is a regression problem ---\n", "\n", "\n", - " #--- calculating kernel matrix when height = 0 ---#\n", + " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.09183931350708 seconds ---\n", - "[[ 5. 6. 4. ..., 20. 20. 20.]\n", - " [ 6. 8. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 5. ..., 21. 21. 21.]\n", - " ..., \n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3646550178527832 seconds ---\n", + "[[ 5. 6. 4. ... 20. 20. 20.]\n", + " [ 6. 8. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 5. ... 21. 21. 21.]\n", + " ...\n", + " [ 20. 20. 21. ... 101. 101. 101.]\n", + " [ 20. 20. 21. ... 101. 101. 101.]\n", + " [ 20. 20. 21. ... 101. 101. 101.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 664.77it/s]\n", " Mean performance on train set: 17.681582\n", "With standard deviation: 0.713183\n", "\n", " Mean performance on test set: 15.685879\n", "With standard deviation: 4.139197\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 681.36it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 1 ---#\n", + " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.1553099155426025 seconds ---\n", - "[[ 10. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 16. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 10. ..., 22. 22. 24.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 24. ..., 122. 122. 154.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.7535510063171387 seconds ---\n", + "[[ 10. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 16. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 10. ... 22. 22. 24.]\n", + " ...\n", + " [ 20. 20. 22. ... 130. 130. 122.]\n", + " [ 20. 20. 22. ... 130. 130. 122.]\n", + " [ 20. 20. 24. ... 122. 122. 154.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 945/1000 [00:01<00:00, 713.00it/s]\n", " Mean performance on train set: 6.270014\n", "With standard deviation: 0.654734\n", "\n", " Mean performance on test set: 7.550458\n", "With standard deviation: 2.331786\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.46it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 2 ---#\n", + " #--- calculating kernel matrix when height = 2.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.368663787841797 seconds ---\n", - "[[ 15. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 24. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 15. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 159. 151. 124.]\n", - " [ 20. 20. 22. ..., 151. 153. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 185.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.3278343677520752 seconds ---\n", + "[[ 15. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 24. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 15. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 159. 151. 124.]\n", + " [ 20. 20. 22. ... 151. 153. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 185.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 949/1000 [00:01<00:00, 736.38it/s]\n", " Mean performance on train set: 4.450682\n", "With standard deviation: 0.882129\n", "\n", " Mean performance on test set: 9.728466\n", "With standard deviation: 2.057669\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 709.22it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 3 ---#\n", + " #--- calculating kernel matrix when height = 3.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.480065822601318 seconds ---\n", - "[[ 20. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 32. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 20. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 188. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 168. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 202.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.7653727531433105 seconds ---\n", + "[[ 20. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 32. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 20. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 188. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 168. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 202.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 959/1000 [00:01<00:00, 724.60it/s]\n", " Mean performance on train set: 2.270586\n", "With standard deviation: 0.481516\n", "\n", " Mean performance on test set: 11.296110\n", "With standard deviation: 2.799944\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 670.29it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 4 ---#\n", + " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 6.052642822265625 seconds ---\n", - "[[ 25. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 40. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 25. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 217. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 183. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 213.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2821996212005615 seconds ---\n", + "[[ 25. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 40. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 25. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 217. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 183. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 213.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 709.28it/s]\n", " Mean performance on train set: 1.074035\n", "With standard deviation: 0.637823\n", "\n", " Mean performance on test set: 12.808303\n", "With standard deviation: 3.446939\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 646.12it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 5 ---#\n", + " #--- calculating kernel matrix when height = 5.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 7.731367111206055 seconds ---\n", - "[[ 30. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 48. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 30. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 246. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 198. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 224.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.706934928894043 seconds ---\n", + "[[ 30. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 48. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 30. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 246. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 198. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 224.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 553.49it/s]\n", " Mean performance on train set: 0.700602\n", "With standard deviation: 0.572640\n", "\n", " Mean performance on test set: 14.017923\n", "With standard deviation: 3.675042\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 621.01it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 6 ---#\n", + " #--- calculating kernel matrix when height = 6.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 8.005469560623169 seconds ---\n", - "[[ 35. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 56. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 35. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 275. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 213. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 235.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.1140964031219482 seconds ---\n", + "[[ 35. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 56. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 35. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 275. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 213. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 235.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|█████████▉| 997/1000 [00:01<00:00, 595.50it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 14.918434\n", "With standard deviation: 3.805352\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 586.05it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 7 ---#\n", + " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 9.233726263046265 seconds ---\n", - "[[ 40. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 64. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 40. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 304. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 228. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 246.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.5894455909729004 seconds ---\n", + "[[ 40. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 64. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 40. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 304. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 228. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 246.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 991/1000 [00:01<00:00, 663.55it/s]\n", " Mean performance on train set: 0.691516\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 15.629476\n", "With standard deviation: 3.865387\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 627.59it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 8 ---#\n", + " #--- calculating kernel matrix when height = 8.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 10.530746698379517 seconds ---\n", - "[[ 45. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 72. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 45. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 333. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 243. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 257.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.081295967102051 seconds ---\n", + "[[ 45. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 72. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 45. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 333. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 243. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 257.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 961/1000 [00:01<00:00, 601.33it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 16.214369\n", "With standard deviation: 3.928756\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 603.90it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 9 ---#\n", + " #--- calculating kernel matrix when height = 9.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 11.556112289428711 seconds ---\n", - "[[ 50. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 80. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 50. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 362. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 258. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 268.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.497286796569824 seconds ---\n", + "[[ 50. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 80. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 50. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 362. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 258. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 268.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 511.55it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 16.725744\n", "With standard deviation: 3.993095\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 550.66it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when height = 10 ---#\n", + " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 12.740600109100342 seconds ---\n", - "[[ 55. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 88. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 55. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 391. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 273. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 279.]]\n", - "\n", - " Saving kernel matrix to file...\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.984841585159302 seconds ---\n", + "[[ 55. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 88. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 55. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 391. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 273. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 279.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 942/1000 [00:01<00:00, 708.78it/s]\n", " Mean performance on train set: 0.691516\n", "With standard deviation: 0.564621\n", "\n", " Mean performance on test set: 17.186401\n", "With standard deviation: 4.056724\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 711.43it/s]\n", "\n", "\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", + " height rmse_test std_test rmse_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 15.6859 4.1392 17.6816 0.713183 1.09184\n", - " 1 7.55046 2.33179 6.27001 0.654734 2.15531\n", - " 2 9.72847 2.05767 4.45068 0.882129 3.36866\n", - " 3 11.2961 2.79994 2.27059 0.481516 4.48007\n", - " 4 12.8083 3.44694 1.07403 0.637823 6.05264\n", - " 5 14.0179 3.67504 0.700602 0.57264 7.73137\n", - " 6 14.9184 3.80535 0.691515 0.56462 8.00547\n", - " 7 15.6295 3.86539 0.691516 0.56462 9.23373\n", - " 8 16.2144 3.92876 0.691515 0.56462 10.5307\n", - " 9 16.7257 3.9931 0.691515 0.56462 11.5561\n", - " 10 17.1864 4.05672 0.691516 0.564621 12.7406\n" + " 0 15.6859 4.1392 17.6816 0.713183 0.364655\n", + " 1 7.55046 2.33179 6.27001 0.654734 0.753551\n", + " 2 9.72847 2.05767 4.45068 0.882129 1.32783\n", + " 3 11.2961 2.79994 2.27059 0.481516 1.76537\n", + " 4 12.8083 3.44694 1.07403 0.637823 2.2822\n", + " 5 14.0179 3.67504 0.700602 0.57264 2.70693\n", + " 6 14.9184 3.80535 0.691515 0.56462 3.1141\n", + " 7 15.6295 3.86539 0.691516 0.56462 3.58945\n", + " 8 16.2144 3.92876 0.691515 0.56462 4.0813\n", + " 9 16.7257 3.9931 0.691515 0.56462 4.49729\n", + " 10 17.1864 4.05672 0.691516 0.564621 4.98484\n" ] } ], "source": [ + "# wl subtree kernel\n", "%load_ext line_profiler\n", "\n", "import numpy as np\n", @@ -319,480 +335,744 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], - "source": [ - "# results\n", - "\n", - "# with y normalization\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", - "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n", - " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n", - " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n", - " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n", - " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n", - " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n", - " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n", - " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n", - " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n", - " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n", - " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n", - "\n", - "# without y normalization\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", - "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n", - " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n", - " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n", - " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n", - " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n", - " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n", - " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n", - " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n", - " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n", - " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n", - " 10 17.1864 4.05672 0.691516 0.564621 5.00918" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "scrolled": true - }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", + " --- This is a regression problem ---\n", "\n", - "@references\n", - " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\n", + " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", + " Loading dataset from file...\n", "\n", - " #--- calculating kernel matrix when subtree height = 0 ---#\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Loading dataset from file...\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 13.504083633422852 seconds ---\n", + "[[ 3. 1. 3. ... 1. 1. 1.]\n", + " [ 1. 6. 1. ... 0. 0. 3.]\n", + " [ 3. 1. 3. ... 1. 1. 1.]\n", + " ...\n", + " [ 1. 0. 1. ... 55. 21. 7.]\n", + " [ 1. 0. 1. ... 21. 55. 7.]\n", + " [ 1. 3. 1. ... 7. 7. 55.]]\n", "\n", - " --- This is a regression problem ---\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 773.79it/s]\n", + " Mean performance on train set: 28.360361\n", + "With standard deviation: 1.357183\n", "\n", - " Calculating kernel matrix, this could take a while...\n", + " Mean performance on test set: 35.191954\n", + "With standard deviation: 4.495767\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 743.82it/s]\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3920705318450928 seconds ---\n", - "[[ 5. 6. 4. ..., 20. 20. 20.]\n", - " [ 6. 8. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 5. ..., 21. 21. 21.]\n", - " ..., \n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]]\n", "\n", - " Saving kernel matrix to file...\n", + " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", - " Mean performance on train set: 17.681582\n", - "With standard deviation: 0.713183\n", + " Loading dataset from file...\n", "\n", - " Mean performance on test set: 15.685879\n", - "With standard deviation: 4.139197\n", + " Calculating kernel matrix, this could take a while...\n", "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 26.82917618751526 seconds ---\n", + "[[ 6. 2. 6. ... 2. 2. 2.]\n", + " [ 2. 12. 2. ... 0. 0. 6.]\n", + " [ 6. 2. 6. ... 2. 2. 2.]\n", + " ...\n", + " [ 2. 0. 2. ... 110. 42. 14.]\n", + " [ 2. 0. 2. ... 42. 110. 14.]\n", + " [ 2. 6. 2. ... 14. 14. 110.]]\n", "\n", - " #--- calculating kernel matrix when subtree height = 1 ---#\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 751.78it/s]\n", + " Mean performance on train set: 27.933534\n", + "With standard deviation: 1.448359\n", "\n", - " Loading dataset from file...\n", + " Mean performance on test set: 35.180815\n", + "With standard deviation: 4.500453\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 744.44it/s]\n", "\n", - " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", + " #--- calculating kernel matrix when height = 2.0 ---#\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.8578901290893555 seconds ---\n", - "[[ 10. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 16. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 10. ..., 22. 22. 24.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 24. ..., 122. 122. 154.]]\n", + " Loading dataset from file...\n", "\n", - " Saving kernel matrix to file...\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Mean performance on train set: 6.270014\n", - "With standard deviation: 0.654734\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 40.235626220703125 seconds ---\n", + "[[ 9. 3. 9. ... 3. 3. 3.]\n", + " [ 3. 18. 3. ... 0. 0. 9.]\n", + " [ 9. 3. 9. ... 3. 3. 3.]\n", + " ...\n", + " [ 3. 0. 3. ... 165. 63. 21.]\n", + " [ 3. 0. 3. ... 63. 165. 21.]\n", + " [ 3. 9. 3. ... 21. 21. 165.]]\n", "\n", - " Mean performance on test set: 7.550458\n", - "With standard deviation: 2.331786\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 694.10it/s]\n", + " Mean performance on train set: 28.111311\n", + "With standard deviation: 1.508915\n", "\n", + " Mean performance on test set: 35.163150\n", + "With standard deviation: 4.502054\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 695.02it/s]\n", "\n", - " #--- calculating kernel matrix when subtree height = 2 ---#\n", "\n", - " Loading dataset from file...\n", + " #--- calculating kernel matrix when height = 3.0 ---#\n", "\n", - " --- This is a regression problem ---\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.264050006866455 seconds ---\n", - "[[ 15. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 24. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 15. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 159. 151. 124.]\n", - " [ 20. 20. 22. ..., 151. 153. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 185.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 54.67040753364563 seconds ---\n", + "[[ 12. 4. 12. ... 4. 4. 4.]\n", + " [ 4. 24. 4. ... 0. 0. 12.]\n", + " [ 12. 4. 12. ... 4. 4. 4.]\n", + " ...\n", + " [ 4. 0. 4. ... 220. 84. 28.]\n", + " [ 4. 0. 4. ... 84. 220. 28.]\n", + " [ 4. 12. 4. ... 28. 28. 220.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 748.03it/s]\n", + " Mean performance on train set: 28.390274\n", + "With standard deviation: 1.365711\n", "\n", - " Mean performance on train set: 4.450682\n", - "With standard deviation: 0.882129\n", - "\n", - " Mean performance on test set: 9.728466\n", - "With standard deviation: 2.057669\n", + " Mean performance on test set: 35.194634\n", + "With standard deviation: 4.498007\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 726.68it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when subtree height = 3 ---#\n", + " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", - "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.731236219406128 seconds ---\n", - "[[ 20. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 32. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 20. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 188. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 168. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 202.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 67.15217232704163 seconds ---\n", + "[[ 15. 5. 15. ... 5. 5. 5.]\n", + " [ 5. 30. 5. ... 0. 0. 15.]\n", + " [ 15. 5. 15. ... 5. 5. 5.]\n", + " ...\n", + " [ 5. 0. 5. ... 275. 105. 35.]\n", + " [ 5. 0. 5. ... 105. 275. 35.]\n", + " [ 5. 15. 5. ... 35. 35. 275.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 950/1000 [00:01<00:00, 737.07it/s]\n", + " Mean performance on train set: 27.974611\n", + "With standard deviation: 1.462223\n", "\n", - " Mean performance on train set: 2.270586\n", - "With standard deviation: 0.481516\n", - "\n", - " Mean performance on test set: 11.296110\n", - "With standard deviation: 2.799944\n", + " Mean performance on test set: 35.175314\n", + "With standard deviation: 4.501113\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.71it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when subtree height = 4 ---#\n", + " #--- calculating kernel matrix when height = 5.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", - "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.1112847328186035 seconds ---\n", - "[[ 25. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 40. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 25. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 217. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 183. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 213.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 80.08806300163269 seconds ---\n", + "[[ 18. 6. 18. ... 6. 6. 6.]\n", + " [ 6. 36. 6. ... 0. 0. 18.]\n", + " [ 18. 6. 18. ... 6. 6. 6.]\n", + " ...\n", + " [ 6. 0. 6. ... 330. 126. 42.]\n", + " [ 6. 0. 6. ... 126. 330. 42.]\n", + " [ 6. 18. 6. ... 42. 42. 330.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 735.71it/s]\n", + " Mean performance on train set: 28.018415\n", + "With standard deviation: 1.455644\n", "\n", - " Mean performance on train set: 1.074035\n", - "With standard deviation: 0.637823\n", + " Mean performance on test set: 35.199713\n", + "With standard deviation: 4.507104\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 738.55it/s]\n", "\n", - " Mean performance on test set: 12.808303\n", - "With standard deviation: 3.446939\n", "\n", + " #--- calculating kernel matrix when height = 6.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 92.19254112243652 seconds ---\n", + "[[ 21. 7. 21. ... 7. 7. 7.]\n", + " [ 7. 42. 7. ... 0. 0. 21.]\n", + " [ 21. 7. 21. ... 7. 7. 7.]\n", + " ...\n", + " [ 7. 0. 7. ... 385. 147. 49.]\n", + " [ 7. 0. 7. ... 147. 385. 49.]\n", + " [ 7. 21. 7. ... 49. 49. 385.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 721.42it/s]\n", + " Mean performance on train set: 28.373079\n", + "With standard deviation: 1.600565\n", "\n", - " #--- calculating kernel matrix when subtree height = 5 ---#\n", + " Mean performance on test set: 35.164471\n", + "With standard deviation: 4.498487\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 727.58it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 105.81170415878296 seconds ---\n", + "[[ 24. 8. 24. ... 8. 8. 8.]\n", + " [ 8. 48. 8. ... 0. 0. 24.]\n", + " [ 24. 8. 24. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 0. 8. ... 440. 168. 56.]\n", + " [ 8. 0. 8. ... 168. 440. 56.]\n", + " [ 8. 24. 8. ... 56. 56. 440.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 968/1000 [00:01<00:00, 739.67it/s]\n", + " Mean performance on train set: 27.960421\n", + "With standard deviation: 1.457425\n", + "\n", + " Mean performance on test set: 35.177115\n", + "With standard deviation: 4.500904\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 733.61it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 8.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.4751319885253906 seconds ---\n", - "[[ 30. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 48. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 30. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 246. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 198. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 224.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 119.0216612815857 seconds ---\n", + "[[ 27. 9. 27. ... 9. 9. 9.]\n", + " [ 9. 54. 9. ... 0. 0. 27.]\n", + " [ 27. 9. 27. ... 9. 9. 9.]\n", + " ...\n", + " [ 9. 0. 9. ... 495. 189. 63.]\n", + " [ 9. 0. 9. ... 189. 495. 63.]\n", + " [ 9. 27. 9. ... 63. 63. 495.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 752.10it/s]\n", + " Mean performance on train set: 28.199059\n", + "With standard deviation: 1.514897\n", "\n", - " Mean performance on train set: 0.700602\n", - "With standard deviation: 0.572640\n", + " Mean performance on test set: 35.196848\n", + "With standard deviation: 4.505256\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 768.54it/s]\n", "\n", - " Mean performance on test set: 14.017923\n", - "With standard deviation: 3.675042\n", "\n", + " #--- calculating kernel matrix when height = 9.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 131.22810459136963 seconds ---\n", + "[[ 30. 10. 30. ... 10. 10. 10.]\n", + " [ 10. 60. 10. ... 0. 0. 30.]\n", + " [ 30. 10. 30. ... 10. 10. 10.]\n", + " ...\n", + " [ 10. 0. 10. ... 550. 210. 70.]\n", + " [ 10. 0. 10. ... 210. 550. 70.]\n", + " [ 10. 30. 10. ... 70. 70. 550.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 932/1000 [00:01<00:00, 763.55it/s]\n", + " Mean performance on train set: 28.266520\n", + "With standard deviation: 1.307686\n", + "\n", + " Mean performance on test set: 35.195635\n", + "With standard deviation: 4.501972\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 764.12it/s]\n", "\n", - " #--- calculating kernel matrix when subtree height = 6 ---#\n", + "\n", + " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 144.96362161636353 seconds ---\n", + "[[ 33. 11. 33. ... 11. 11. 11.]\n", + " [ 11. 66. 11. ... 0. 0. 33.]\n", + " [ 33. 11. 33. ... 11. 11. 11.]\n", + " ...\n", + " [ 11. 0. 11. ... 605. 231. 77.]\n", + " [ 11. 0. 11. ... 231. 605. 77.]\n", + " [ 11. 33. 11. ... 77. 77. 605.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|█████████▉| 996/1000 [00:01<00:00, 820.73it/s]\n", + " Mean performance on train set: 28.416280\n", + "With standard deviation: 1.615957\n", + "\n", + " Mean performance on test set: 35.167588\n", + "With standard deviation: 4.497227\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 822.53it/s]\n", + "\n", + "\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 35.192 4.49577 28.3604 1.35718 13.5041\n", + " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n", + " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n", + " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n", + " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n", + " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n", + " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n", + " 7 35.1771 4.5009 27.9604 1.45742 105.812\n", + " 8 35.1968 4.50526 28.1991 1.5149 119.022\n", + " 9 35.1956 4.50197 28.2665 1.30769 131.228\n", + " 10 35.1676 4.49723 28.4163 1.61596 144.964\n" + ] + } + ], + "source": [ + "# WL sp kernel\n", + "%load_ext line_profiler\n", + "\n", + "import numpy as np\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'sp')\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "\n", + "# %lprun -f _wl_subtreekernel_do \\\n", + "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n", + "\n", " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.8712213039398193 seconds ---\n", - "[[ 35. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 56. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 35. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 275. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 213. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 235.]]\n", + " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", - " Saving kernel matrix to file...\n", + " Loading dataset from file...\n", "\n", - " Mean performance on train set: 0.691515\n", - "With standard deviation: 0.564620\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Mean performance on test set: 14.918434\n", - "With standard deviation: 3.805352\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 0.8530018329620361 seconds ---\n", + "[[ 2. 1. 2. ... 0. 0. 1.]\n", + " [ 1. 3. 1. ... 0. 0. 2.]\n", + " [ 2. 1. 2. ... 0. 0. 1.]\n", + " ...\n", + " [ 0. 0. 0. ... 10. 7. 0.]\n", + " [ 0. 0. 0. ... 7. 10. 1.]\n", + " [ 1. 2. 1. ... 0. 1. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 947/1000 [00:01<00:00, 719.29it/s]\n", + " Mean performance on train set: 29.997498\n", + "With standard deviation: 0.902340\n", + "\n", + " Mean performance on test set: 33.407740\n", + "With standard deviation: 4.732717\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 653.54it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when subtree height = 7 ---#\n", + " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 1.717505931854248 seconds ---\n", + "[[ 4. 2. 4. ... 0. 0. 2.]\n", + " [ 2. 6. 2. ... 0. 0. 4.]\n", + " [ 4. 2. 4. ... 0. 0. 2.]\n", + " ...\n", + " [ 0. 0. 0. ... 20. 14. 0.]\n", + " [ 0. 0. 0. ... 14. 20. 2.]\n", + " [ 2. 4. 2. ... 0. 2. 20.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 956/1000 [00:01<00:00, 721.27it/s]\n", + " Mean performance on train set: 30.160338\n", + "With standard deviation: 1.094235\n", + "\n", + " Mean performance on test set: 33.423458\n", + "With standard deviation: 4.721311\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 723.53it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 2.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.554422378540039 seconds ---\n", - "[[ 40. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 64. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 40. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 304. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 228. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 246.]]\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 2.6603214740753174 seconds ---\n", + "[[ 6. 3. 6. ... 0. 0. 3.]\n", + " [ 3. 9. 3. ... 0. 0. 6.]\n", + " [ 6. 3. 6. ... 0. 0. 3.]\n", + " ...\n", + " [ 0. 0. 0. ... 30. 21. 0.]\n", + " [ 0. 0. 0. ... 21. 30. 3.]\n", + " [ 3. 6. 3. ... 0. 3. 30.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 944/1000 [00:01<00:00, 650.98it/s]\n", + " Mean performance on train set: 29.928570\n", + "With standard deviation: 0.787941\n", "\n", - " Mean performance on train set: 0.691516\n", - "With standard deviation: 0.564620\n", + " Mean performance on test set: 33.433014\n", + "With standard deviation: 4.724408\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 688.71it/s]\n", "\n", - " Mean performance on test set: 15.629476\n", - "With standard deviation: 3.865387\n", + "\n", + " #--- calculating kernel matrix when height = 3.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 3.477631092071533 seconds ---\n", + "[[ 8. 4. 8. ... 0. 0. 4.]\n", + " [ 4. 12. 4. ... 0. 0. 8.]\n", + " [ 8. 4. 8. ... 0. 0. 4.]\n", + " ...\n", + " [ 0. 0. 0. ... 40. 28. 0.]\n", + " [ 0. 0. 0. ... 28. 40. 4.]\n", + " [ 4. 8. 4. ... 0. 4. 40.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 725.15it/s]\n", + " Mean performance on train set: 30.011409\n", + "With standard deviation: 0.909674\n", + "\n", + " Mean performance on test set: 33.407319\n", + "With standard deviation: 4.732434\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 720.71it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when subtree height = 8 ---#\n", + " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 4.5436692237854 seconds ---\n", + "[[10. 5. 10. ... 0. 0. 5.]\n", + " [ 5. 15. 5. ... 0. 0. 10.]\n", + " [10. 5. 10. ... 0. 0. 5.]\n", + " ...\n", + " [ 0. 0. 0. ... 50. 35. 0.]\n", + " [ 0. 0. 0. ... 35. 50. 5.]\n", + " [ 5. 10. 5. ... 0. 5. 50.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 568.04it/s]\n", + " Mean performance on train set: 30.184162\n", + "With standard deviation: 1.108902\n", + "\n", + " Mean performance on test set: 33.425625\n", + "With standard deviation: 4.721660\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 564.24it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 5.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8757314682006836 seconds ---\n", - "[[ 45. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 72. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 45. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 333. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 243. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 257.]]\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 5.6617820262908936 seconds ---\n", + "[[12. 6. 12. ... 0. 0. 6.]\n", + " [ 6. 18. 6. ... 0. 0. 12.]\n", + " [12. 6. 12. ... 0. 0. 6.]\n", + " ...\n", + " [ 0. 0. 0. ... 60. 42. 0.]\n", + " [ 0. 0. 0. ... 42. 60. 6.]\n", + " [ 6. 12. 6. ... 0. 6. 60.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 993/1000 [00:01<00:00, 519.25it/s]\n", + " Mean performance on train set: 30.041068\n", + "With standard deviation: 1.018451\n", "\n", - " Mean performance on train set: 0.691515\n", - "With standard deviation: 0.564620\n", + " Mean performance on test set: 33.406717\n", + "With standard deviation: 4.726409\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 548.91it/s]\n", "\n", - " Mean performance on test set: 16.214369\n", - "With standard deviation: 3.928756\n", "\n", + " #--- calculating kernel matrix when height = 6.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 6.148027420043945 seconds ---\n", + "[[14. 7. 14. ... 0. 0. 7.]\n", + " [ 7. 21. 7. ... 0. 0. 14.]\n", + " [14. 7. 14. ... 0. 0. 7.]\n", + " ...\n", + " [ 0. 0. 0. ... 70. 49. 0.]\n", + " [ 0. 0. 0. ... 49. 70. 7.]\n", + " [ 7. 14. 7. ... 0. 7. 70.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 498.31it/s]\n", + " Mean performance on train set: 29.905596\n", + "With standard deviation: 0.782179\n", "\n", - " #--- calculating kernel matrix when subtree height = 9 ---#\n", + " Mean performance on test set: 33.418992\n", + "With standard deviation: 4.730753\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 534.86it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.603543519973755 seconds ---\n", + "[[16. 8. 16. ... 0. 0. 8.]\n", + " [ 8. 24. 8. ... 0. 0. 16.]\n", + " [16. 8. 16. ... 0. 0. 8.]\n", + " ...\n", + " [ 0. 0. 0. ... 80. 56. 0.]\n", + " [ 0. 0. 0. ... 56. 80. 8.]\n", + " [ 8. 16. 8. ... 0. 8. 80.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 586.15it/s]\n", + " Mean performance on train set: 30.175921\n", + "With standard deviation: 1.103820\n", + "\n", + " Mean performance on test set: 33.424820\n", + "With standard deviation: 4.721550\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 546.00it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 8.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.205373764038086 seconds ---\n", - "[[ 50. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 80. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 50. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 362. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 258. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 268.]]\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.972221612930298 seconds ---\n", + "[[18. 9. 18. ... 0. 0. 9.]\n", + " [ 9. 27. 9. ... 0. 0. 18.]\n", + " [18. 9. 18. ... 0. 0. 9.]\n", + " ...\n", + " [ 0. 0. 0. ... 90. 63. 0.]\n", + " [ 0. 0. 0. ... 63. 90. 9.]\n", + " [ 9. 18. 9. ... 0. 9. 90.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 490.30it/s]\n", + " Mean performance on train set: 30.136537\n", + "With standard deviation: 1.074854\n", "\n", - " Mean performance on train set: 0.691515\n", - "With standard deviation: 0.564620\n", + " Mean performance on test set: 33.412196\n", + "With standard deviation: 4.715539\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 536.66it/s]\n", "\n", - " Mean performance on test set: 16.725744\n", - "With standard deviation: 3.993095\n", "\n", + " #--- calculating kernel matrix when height = 9.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " #--- calculating kernel matrix when subtree height = 10 ---#\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 9.070842504501343 seconds ---\n", + "[[ 20. 10. 20. ... 0. 0. 10.]\n", + " [ 10. 30. 10. ... 0. 0. 20.]\n", + " [ 20. 10. 20. ... 0. 0. 10.]\n", + " ...\n", + " [ 0. 0. 0. ... 100. 70. 0.]\n", + " [ 0. 0. 0. ... 70. 100. 10.]\n", + " [ 10. 20. 10. ... 0. 10. 100.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 527.13it/s]\n", + " Mean performance on train set: 30.032887\n", + "With standard deviation: 0.921065\n", + "\n", + " Mean performance on test set: 33.407050\n", + "With standard deviation: 4.731928\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 600.62it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Calculating kernel matrix, this could take a while...\n" + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 10.02536916732788 seconds ---\n", + "[[ 22. 11. 22. ... 0. 0. 11.]\n", + " [ 11. 33. 11. ... 0. 0. 22.]\n", + " [ 22. 11. 22. ... 0. 0. 11.]\n", + " ...\n", + " [ 0. 0. 0. ... 110. 77. 0.]\n", + " [ 0. 0. 0. ... 77. 110. 11.]\n", + " [ 11. 22. 11. ... 0. 11. 110.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.737298250198364 seconds ---\n", - "[[ 55. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 88. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 55. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 391. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 273. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 279.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on train set: 0.691516\n", - "With standard deviation: 0.564621\n", - "\n", - " Mean performance on test set: 17.186401\n", - "With standard deviation: 4.056724\n", - "\n", - "\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", - "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 15.6859 4.1392 17.6816 0.713183 0.392071\n", - " 1 7.55046 2.33179 6.27001 0.654734 0.85789\n", - " 2 9.72847 2.05767 4.45068 0.882129 1.26405\n", - " 3 11.2961 2.79994 2.27059 0.481516 1.73124\n", - " 4 12.8083 3.44694 1.07403 0.637823 2.11128\n", - " 5 14.0179 3.67504 0.700602 0.57264 2.47513\n", - " 6 14.9184 3.80535 0.691515 0.56462 2.87122\n", - " 7 15.6295 3.86539 0.691516 0.56462 3.55442\n", - " 8 16.2144 3.92876 0.691515 0.56462 3.87573\n", - " 9 16.7257 3.9931 0.691515 0.56462 4.20537\n", - " 10 17.1864 4.05672 0.691516 0.564621 4.7373\n" + "calculate performance: 97%|█████████▋| 970/1000 [00:01<00:00, 694.38it/s]\n", + " Mean performance on train set: 29.924232\n", + "With standard deviation: 0.790843\n", + "\n", + " Mean performance on test set: 33.416469\n", + "With standard deviation: 4.731694\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 678.72it/s]\n", + "\n", + "\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- ---------\n", + " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n", + " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n", + " 2 33.433 4.72441 29.9286 0.787941 2.66032\n", + " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n", + " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n", + " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n", + " 6 33.419 4.73075 29.9056 0.782179 6.14803\n", + " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n", + " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n", + " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n", + " 10 33.4165 4.73169 29.9242 0.790843 10.0254\n" ] } ], "source": [ - "# test of WL subtree kernel\n", - "\n", - "\"\"\"\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\"\"\"\n", - "\n", - "print(__doc__)\n", + "# WL edge kernel\n", + "%load_ext line_profiler\n", "\n", + "import numpy as np\n", "import sys\n", - "import os\n", - "import pathlib\n", - "from collections import OrderedDict\n", "sys.path.insert(0, \"../\")\n", - "from tabulate import tabulate\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", - "from pygraph.utils.graphfiles import loadDataset\n", - "from pygraph.utils.utils import split_train_test\n", - "\n", - "train_means_list = []\n", - "train_stds_list = []\n", - "test_means_list = []\n", - "test_stds_list = []\n", - "kernel_time_list = []\n", - "\n", - "for height in np.linspace(0, 10, 11):\n", - " print('\\n\\n #--- calculating kernel matrix when subtree height = %d ---#' % height)\n", - "\n", - " print('\\n Loading dataset from file...')\n", - " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", - " y = np.array(y)\n", - "# print(y)\n", - "\n", - " # setup the parameters\n", - " model_type = 'regression' # Regression or classification problem\n", - " print('\\n --- This is a %s problem ---' % model_type)\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", "\n", - "# datasize = len(dataset)\n", - " trials = 100 # Trials for hyperparameters random search\n", - " splits = 10 # Number of splits of the data\n", - " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", - " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'edge')\n", "\n", - " # set the output path\n", - " kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", - " if not os.path.exists(kernel_file_path):\n", - " os.makedirs(kernel_file_path)\n", + "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", "\n", - " \"\"\"\n", - " - Here starts the main program\n", - " - First we permute the data, then for each split we evaluate corresponding performances\n", - " - In the end, the performances are averaged over the test sets\n", - " \"\"\"\n", + "# %lprun -f _wl_subtreekernel_do \\\n", + "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# results\n", "\n", - " # save kernel matrices to files / read kernel matrices from files\n", - " kernel_file = kernel_file_path + 'km.ds'\n", - " path = pathlib.Path(kernel_file)\n", - " # get train set kernel matrix\n", - " if path.is_file():\n", - " print('\\n Loading the kernel matrix from file...')\n", - " Kmatrix = np.loadtxt(kernel_file)# results\n", - " print(Kmatrix)\n", - " else:\n", - " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n", - " kernel_time_list.append(run_time)\n", - " print(Kmatrix)\n", - " print('\\n Saving kernel matrix to file...')\n", - " # np.savetxt(kernel_file, Kmatrix)\n", + "# subtree with y normalization\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n", + " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n", + " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n", + " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n", + " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n", + " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n", + " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n", + " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n", + " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n", + " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n", + " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n", "\n", - " train_mean, train_std, test_mean, test_std = \\\n", - " split_train_test(Kmatrix, y, alpha_grid, C_grid, splits, trials, model_type, normalize = False)\n", - " \n", - " train_means_list.append(train_mean)\n", - " train_stds_list.append(train_std)\n", - " test_means_list.append(test_mean)\n", - " test_stds_list.append(test_std)\n", + "# subtree without y normalization\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n", + " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n", + " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n", + " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n", + " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n", + " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n", + " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n", + " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n", + " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n", + " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n", + " 10 17.1864 4.05672 0.691516 0.564621 5.00918\n", " \n", - "print('\\n') \n", - "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_list, 'std_test': test_stds_list, \\\n", - " 'RMSE_train': train_means_list, 'std_train': train_stds_list, 'k_time': kernel_time_list}\n", - "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n", - "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))" + "# sp\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 35.192 4.49577 28.3604 1.35718 13.5041\n", + " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n", + " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n", + " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n", + " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n", + " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n", + " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n", + " 7 35.1771 4.5009 27.9604 1.45742 105.812\n", + " 8 35.1968 4.50526 28.1991 1.5149 119.022\n", + " 9 35.1956 4.50197 28.2665 1.30769 131.228\n", + " 10 35.1676 4.49723 28.4163 1.61596 144.964\n", + " \n", + "# path\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- ---------\n", + " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n", + " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n", + " 2 33.433 4.72441 29.9286 0.787941 2.66032\n", + " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n", + " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n", + " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n", + " 6 33.419 4.73075 29.9056 0.782179 6.14803\n", + " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n", + " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n", + " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n", + " 10 33.4165 4.73169 29.9242 0.790843 10.0254" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": { "scrolled": true }, @@ -802,117 +1082,25 @@ "output_type": "stream", "text": [ "{'O', 'C'}\n", - "{'O', 'C'}\n", - "--- shortest path kernel built in 0.0002582073211669922 seconds ---\n", - "3\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'O'})]\n", - " -> \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CO'}), (3, {'label': 'CCCO'}), (4, {'label': 'OCC'})]\n", - " -> \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '3'}), (3, {'label': '1'}), (4, {'label': '2'})]\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'C'}), (5, {'label': 'C'}), (6, {'label': 'O'})]\n", - " -> \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CC'}), (3, {'label': 'CO'}), (4, {'label': 'CCCC'}), (5, {'label': 'CCCO'}), (6, {'label': 'OCC'})]\n", - " -> \n" + "{'O', 'C'}\n" ] }, { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '0'}), (3, {'label': '3'}), (4, {'label': '4'}), (5, {'label': '1'}), (6, {'label': '2'})]\n", - "--- shortest path kernel built in 0.00026607513427734375 seconds ---\n", - "6\n" + "ename": "TypeError", + "evalue": "'int' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(edge_weight, *args)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mgetSPGraph\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \"\"\"\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'bond_type'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mfloydTransformation\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \"\"\"\n\u001b[0;32m---> 58\u001b[0;31m \u001b[0mspMatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloyd_warshall_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_nodes_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/algorithms/shortest_paths/dense.py\u001b[0m in \u001b[0;36mfloyd_warshall_numpy\u001b[0;34m(G, nodelist, weight)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m# nonedges are not given the value 0 as well.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m A = nx.to_numpy_matrix(G, nodelist=nodelist, multigraph_weight=min,\n\u001b[0;32m---> 54\u001b[0;31m weight=weight, nonedge=np.inf)\n\u001b[0m\u001b[1;32m 55\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mI\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midentity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_matrix\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 446\u001b[0m A = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order,\n\u001b[1;32m 447\u001b[0m \u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 448\u001b[0;31m nonedge=nonedge)\n\u001b[0m\u001b[1;32m 449\u001b[0m \u001b[0mM\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mM\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_array\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1062\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnodelist\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1063\u001b[0;31m \u001b[0mnodelist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1064\u001b[0m \u001b[0mnodeset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1065\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodeset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: 'int' object is not iterable" ] } ], @@ -1541,411 +1729,6 @@ "weisfeilerlehmankernel(G1, G2, height = 2)\n", "# Kmatrix = weisfeilerlehmankernel(G1, G2)" ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "185" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "len(dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\n", - "\n", - " --- calculating kernel matrix when subtree height = 0 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "# Author: Elisabetta Ghisu\n", - "# test of WL subtree kernel\n", - "\n", - "\"\"\"\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\"\"\"\n", - "\n", - "print(__doc__)\n", - "\n", - "import sys\n", - "import os\n", - "import pathlib\n", - "sys.path.insert(0, \"../\")\n", - "from tabulate import tabulate\n", - "\n", - "import random\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from sklearn.kernel_ridge import KernelRidge # 0.17\n", - "from sklearn.metrics import accuracy_score, mean_squared_error\n", - "from sklearn import svm\n", - "\n", - "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", - "from pygraph.utils.graphfiles import loadDataset\n", - "\n", - "val_means_height = []\n", - "val_stds_height = []\n", - "test_means_height = []\n", - "test_stds_height = []\n", - "\n", - "\n", - "for height in np.linspace(0, 10, 11):\n", - " print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n", - "\n", - " print('\\n Loading dataset from file...')\n", - " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", - " y = np.array(y)\n", - " print(y)\n", - "\n", - " # setup the parameters\n", - " model_type = 'regression' # Regression or classification problem\n", - " print('\\n --- This is a %s problem ---' % model_type)\n", - "\n", - " datasize = len(dataset)\n", - " trials = 100 # Trials for hyperparameters random search\n", - " splits = 10 # Number of splits of the data\n", - " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", - " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", - " random.seed(20) # Set the seed for uniform parameter distribution\n", - "\n", - " # set the output path\n", - " kernel_file_path = 'kernelmatrices_weisfeilerlehman_acyclic/'\n", - " if not os.path.exists(kernel_file_path):\n", - " os.makedirs(kernel_file_path)\n", - "\n", - "\n", - " \"\"\"\n", - " - Here starts the main program\n", - " - First we permute the data, then for each split we evaluate corresponding performances\n", - " - In the end, the performances are averaged over the test sets\n", - " \"\"\"\n", - "\n", - " # save kernel matrices to files / read kernel matrices from files\n", - " kernel_file = kernel_file_path + 'km.ds'\n", - " path = pathlib.Path(kernel_file)\n", - " # get train set kernel matrix\n", - " if path.is_file():\n", - " print('\\n Loading the kernel matrix from file...')\n", - " Kmatrix = np.loadtxt(kernel_file)\n", - " print(Kmatrix)\n", - " else:\n", - " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n", - " print(Kmatrix)\n", - " print('\\n Saving kernel matrix to file...')\n", - "# np.savetxt(kernel_file, Kmatrix)\n", - "\n", - " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", - " val_split = []\n", - " test_split = []\n", - "\n", - " # For each split of the data\n", - " for j in range(10, 10 + splits):\n", - " # print('\\n Starting split %d...' % j)\n", - "\n", - " # Set the random set for data permutation\n", - " random_state = int(j)\n", - " np.random.seed(random_state)\n", - " idx_perm = np.random.permutation(datasize)\n", - " # print(idx_perm)\n", - "\n", - " # Permute the data\n", - " y_perm = y[idx_perm] # targets permutation\n", - " # print(y_perm)\n", - " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n", - " # print(Kmatrix_perm)\n", - " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", - "\n", - " # Set the training, validation and test\n", - " # Note: the percentage can be set up by the user\n", - " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", - " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", - " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", - " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", - "\n", - " # Split the kernel matrix\n", - " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", - " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", - " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", - "\n", - " # Split the targets\n", - " y_train = y_perm[0:num_train]\n", - "\n", - " # Normalization step (for real valued targets only)\n", - " if model_type == 'regression':\n", - " # print('\\n Normalizing output y...')\n", - " y_train_mean = np.mean(y_train)\n", - " y_train_std = np.std(y_train)\n", - " y_train = (y_train - y_train_mean) / float(y_train_std)\n", - " # print(y)\n", - "\n", - " y_val = y_perm[num_train:(num_train + num_val)]\n", - " y_test = y_perm[(num_train + num_val):datasize]\n", - "\n", - " # Record the performance for each parameter trial respectively on validation and test set\n", - " perf_all_train = []\n", - " perf_all_test = []\n", - "\n", - " # For each parameter trial\n", - " for i in range(trials):\n", - " # For regression use the Kernel Ridge method\n", - " if model_type == 'regression':\n", - " # print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n", - "\n", - " # Fit the kernel ridge model\n", - " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n", - " # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", - " KR.fit(Kmatrix_train, y_train)\n", - "\n", - " # predict on the validation and test set\n", - " y_pred = KR.predict(Kmatrix_val)\n", - " y_pred_test = KR.predict(Kmatrix_test)\n", - " # print(y_pred)\n", - "\n", - " # adjust prediction: needed because the training targets have been normalizaed\n", - " y_pred = y_pred * float(y_train_std) + y_train_mean\n", - " # print(y_pred)\n", - " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", - " # print(y_pred_test)\n", - "\n", - " # root mean squared error on validation\n", - " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", - " perf_all_val.append(rmse)\n", - "\n", - " # root mean squared error in test \n", - " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", - " perf_all_test.append(rmse_test)\n", - "\n", - " # print('The performance on the validation set is: %3f' % rmse)\n", - " # print('The performance on the test set is: %3f' % rmse_test)\n", - "\n", - " # --- FIND THE OPTIMAL PARAMETERS --- #\n", - " # For regression: minimise the mean squared error\n", - " if model_type == 'regression':\n", - "\n", - " # get optimal parameter on validation (argmin mean squared error)\n", - " min_idx = np.argmin(perf_all_test)\n", - " alpha_opt = alpha_grid[min_idx]\n", - "\n", - " # performance corresponding to optimal parameter on val\n", - " perf_val_opt = perf_all_val[min_idx]\n", - "\n", - " # corresponding performance on test for the same parameter\n", - " perf_test_opt = perf_all_test[min_idx]\n", - "\n", - " # print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", - " # print('The best performance on the validation set is: %3f' % perf_val_opt)\n", - " # print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", - "\n", - " # append the best performance on validation\n", - " # at the current split\n", - " val_split.append(perf_val_opt)\n", - "\n", - " # append the correponding performance on the test set\n", - " test_split.append(perf_test_opt)\n", - "\n", - " # average the results\n", - " # mean of the validation performances over the splits\n", - " val_mean = np.mean(np.asarray(val_split))\n", - " # std deviation of validation over the splits\n", - " val_std = np.std(np.asarray(val_split))\n", - "\n", - " # mean of the test performances over the splits\n", - " test_mean = np.mean(np.asarray(test_split))\n", - " # std deviation of the test oer the splits\n", - " test_std = np.std(np.asarray(test_split))\n", - "\n", - " print('\\n Mean performance on val set: %3f' % val_mean)\n", - " print('With standard deviation: %3f' % val_std)\n", - " print('\\n Mean performance on test set: %3f' % test_mean)\n", - " print('With standard deviation: %3f' % test_std)\n", - " \n", - " val_means_height.append(val_mean)\n", - " val_stds_height.append(val_std)\n", - " test_means_height.append(test_mean)\n", - " test_stds_height.append(test_std)\n", - " \n", - "print('\\n') \n", - "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# a = [0, 1, 3, 2]\n", - "# b = [3, 2, 1, 0]\n", - "# print(1 if a == b else 0)\n", - "\n", - "# max(1 ,2)\n", - "\n", - "# x = [ 'r', 'a', 's' ]\n", - "# x.sort()\n", - "# print(x)\n", - "\n", - "# def test1(*args, base = 'subtree'):\n", - "# if base == 'subtree':\n", - "# print('subtree')\n", - "# elif base == 'edge':\n", - "# print('edge')\n", - "# else:\n", - "# print('sp')\n", - "\n", - "# # function parameter usage test\n", - "# test1('hello', 'hi', base = 'edge')\n", - "\n", - "# # python matrix calculation speed test\n", - "# import numpy as np\n", - "# import time\n", - "\n", - "# size = 100\n", - "# m1 = np.random.random((size, size))\n", - "# m2 = np.random.random((size, size))\n", - "# itr = 1\n", - "\n", - "# start_time = time.time()\n", - "# for i in range(itr):\n", - "# np.dot(m1, m2)\n", - "# print(time.time() - start_time)\n", - "\n", - "# start_time = time.time()\n", - "# for j in range(itr):\n", - "# result = np.zeros((size, size))\n", - "# for i1 in range(size):\n", - "# for i2 in range(size):\n", - "# for i3 in range(size):\n", - "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", - "# print(time.time() - start_time)\n", - "\n", - "# start_time = time.time()\n", - "# for i in range(itr):\n", - "# print(np.dot(m1, m2))\n", - "# print(time.time() - start_time)\n", - "\n", - "# start_time = time.time()\n", - "# for j in range(itr):\n", - "# result = np.zeros((size, size))\n", - "# for i1 in range(size):\n", - "# for i2 in range(size):\n", - "# for i3 in range(size):\n", - "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", - "# print(result)\n", - "# print(time.time() - start_time)\n", - "\n", - "# help(np.sum)\n", - "\n", - "# test dict\n", - "import sys\n", - "from collections import Counter\n", - "import networkx as nx\n", - "sys.path.insert(0, \"../\")\n", - "from pygraph.utils.graphfiles import loadDataset\n", - "from pygraph.kernels.spkernel import spkernel\n", - "\n", - "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", - "G1 = dataset[15]\n", - "nx.get_node_attributes(G1, 'label')\n", - "listhqhq = list(nx.get_node_attributes(G1, 'label').values())\n", - "dicthaha = dict(Counter(listhqhq))\n", - "len(dicthaha)" - ] } ], "metadata": { diff --git a/notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb new file mode 100644 index 0000000..0b7ae52 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/test_lib-checkpoint.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import paths\n", + "\n", + "from ged.GED import ged\n", + "from utils.graphfiles import loadDataset\n", + "from ged.costfunctions import RiesenCostFunction, BasicCostFunction\n", + "from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt8VNW99/HPL9BIohKpwOOFq6dclALaBqqvB60Va+Um\nhVgOnKKlx8oxIE89CtUAcrdQudTKJYrnqUFqq9ZE4FQQL209lIImKAWBogHtIcIxqcV4ISCXdf7Y\nEwzJJDNJZrJndr7v1ysvM3uv2fPbzuTLmrX3Xtucc4iISLCk+F2AiIjEnsJdRCSAFO4iIgGkcBcR\nCSCFu4hIACncRUQCSOEuIhJACncRkQBSuIuIBFBLv164bdu2rkuXLn69vIhIUtq2bdvfnXPtIrXz\nLdy7dOlCUVGRXy8vIpKUzOxv0bTTsIyISAAp3EVEAkjhLiISQAp3EZEAUriLiASQwl1EJIAU7iIi\nAaRwFxEJIN8uYhI5Q2kp5OXBjh1QXg4ZGdCnD/zwh9Au4sV4IlKNwl38VVgI8+fDhg3e46NHv1hX\nUAAzZ8KgQZCTA/36+VOjSBLSsIz4JzcXrr0W1qzxQr1qsANUVHjL1qzx2uXm+lGlSFJSz138kZsL\nkyfDkSOR2zrntZs82XucnR3f2kQCQD13aXqFhTWCfRmQCZwFjKvteZUBrwnnRCJSuEvTmz/fG3Kp\n4iJgOvCvkZ5bUeE9X0TqFDHczeyXZlZqZm/Vst7M7GEzKzazHWb2tdiXKYFRWuodPHXujMUjge8C\n50d6vnOwfj2UlcWpQJFgiKbnngfcWMf6QUC30M94QEe9pHZ5eY3fhllstiMSYBHD3Tn3X8A/6mgy\nHHjCebYC55nZhbEqUAJmx46aZ8XUV0UF7NwZm3pEAioWY+4XAweqPC4JLavBzMabWZGZFZXpa3Xz\nVF4em+0cPhyb7YgEVCzC3cIsc2GW4Zxb6ZzLdM5lttNVh81TRkZsttOmTWy2IxJQsQj3EqBjlccd\ngIMx2K4EUZ8+0KpVjcUngKPAydDP0dCysNLSoHfveFUoEgixCPd1wK2hs2auBMqdc4disF0JonHj\nwi6eB6QBC4BfhX6fV9s2nKt1OyLiieZUyN8AW4AeZlZiZreZ2R1mdkeoyXpgP1AMPAZMiFu1kvza\nt/fmirEzR/Nm4Y3lVf2ZFe75ZjB4sCYTE4kg4vQDzrkxEdY7YGLMKpLgy8mBjRujm3qgurQ07/ki\nUiddoSpNr18/WLQI0tPr97z0dO95mZnxqUskQBTu4o/s7C8C3sKdcPWFk4BLS/Paa9Iwkago3MU/\n2dnw6qswYoR3Bk1a2pnr09KgVSt2XnIJ82+8UcEuUg+a8lf8lZkJ+fneXDF5ed6Vp4cPe+ex9+4N\n48bRNTWVZZdeynVbt3LllVf6XbFIUlC4S2Jo1w6mTAm7KgNYtGgR2dnZFBYW0rKlPrYikWhYRpLC\nmDFj+PKXv8yKFSv8LkUkKSjcJSmYGcuXL2fu3LkcPKgLoEUiUbhL0ujZsyfjx4/nnnvu8bsUkYSn\ncJekMm3aNLZu3cpLL73kdykiCU3hLkklPT2dpUuXMnHiRI4dO+Z3OSIJS+EuSWfo0KH06tWLBx98\n0O9SRBKWwl2S0i9+8Qt+8YtfsG/fPr9LEUlICndJSp06deInP/kJkyZNwrmw94YRadYU7pK07rrr\nLv72t7/x3HPP+V2KSMJRuEvSSk1NJTc3l7vuuotPP/3U73JEEorCXZLaNddcw7e+9S1mz57tdyki\nCUXhLklv4cKFrFq1ip07d/pdikjCULhL0mvfvj1z5swhOzubU6dO+V2OSEJQuEsg3H777Rw/fpxV\nq1b5XYpIQlC4SyC0aNGC3NxccnJy+PDDD/0uR8R3CncJjK997WuMGjWKHN1AW0ThLsEyd+5cnn/+\nebZu3ep3KSK+UrhLoGRkZLBo0SLuuOMOTpw44Xc5Ir5RuEvgjB49mrZt27J8+XK/SxHxjcJdAqfy\nrk3z5s3TXZuk2VK4SyD16NGDf/u3f+Puu+/2uxQRXyjcJbCmTZvG66+/rrs2SbMUVbib2Y1mttfM\nis3svjDrO5nZH8zsTTPbYWaDY1+qSP2kpaWxdOlSJkyYwNGjR/0uR6RJRQx3M2sBLAcGAZcBY8zs\nsmrNpgPPOOeuAEYDK2JdqEhDDBkyhN69e+uuTdLsRNNz7w8UO+f2O+c+B54Chldr44DWod8zAB3F\nkoTx0EMP8fDDD+uuTdKsRBPuFwMHqjwuCS2rahYw1sxKgPXApHAbMrPxZlZkZkVlZWUNKFek/jp1\n6sS9997LnXfeqbs2SbMRTbhbmGXV/0LGAHnOuQ7AYGC1mdXYtnNupXMu0zmX2a5du/pXK9JAd911\nFwcOHKCgoMDvUkSaRDThXgJ0rPK4AzWHXW4DngFwzm0BWgFtY1GgSCx86UtfOn3Xpk8++cTvckTi\nLppwLwS6mVlXM0vFO2C6rlqb/wYGApjZpXjhrnEXSShXX301AwcO1F2bpFmIGO7OuRPAncBGYA/e\nWTG7zGyOmd0UanYPcLuZ/QX4DTDOaXBTEtDChQt54okn2LFjh9+liMSV+ZXBmZmZrqioyJfXlubt\n0Ucf5YknnmDTpk2kpOg6PkkuZrbNOZcZqZ0+2dLs3H777Zw4cYK8vDy/SxGJG4W7NDspKSk88sgj\numuTBJrCXZqlK664gtGjR3PffTVm0xAJBIW7NFtz5sxh/fr1bNmyxe9SRGJO4S7NVkZGBosXLyY7\nO1t3bZLAUbhLs/bP//zPtG3blmXLlvldikhMKdylWTMzVqxYwbx583j//ff9LkckZhTu0ux1796d\n7Oxs3bVJAkXhLgJMnTqVwsJCXnzxRb9LEYkJhbsI3l2bli1bxsSJE3XXJgkEhbtIyODBg+nTpw8/\n+9nP/C5FpNEU7iJVPPTQQyxdupTi4mK/SxFpFIW7SBUdO3bkvvvu012bJOkp3EWq+fGPf8z7779P\nfn6+36WINJjCXaSayrs2/fu//7vu2iRJS+EuEsaAAQO4/vrrmTVrlt+liDSIwl2kFg8++CCrV6/W\nXZskKSncRWrRrl075s2bR3Z2NqdOnfK7HJF6UbiL1OFHP/oRJ0+e5PHHH/e7FJF6UbiL1CElJYXc\n3FymTp3K3//+d7/LEYmawl0kgiuuuIIxY8bork2SVBTuIlGYM2cOL7zwAn/+85/9LkUkKgp3kSi0\nbt1ad22SpKJwF4nSqFGjaN++PUuXLvW7FJGIFO4iUTIzli9fzgMPPEBJSYnf5YjUSeEuUg/du3dn\nwoQJumuTJLyowt3MbjSzvWZWbGZhTxkws1FmttvMdpnZr2NbpkjiyMnJYdu2bWzcuNHvUkRqFTHc\nzawFsBwYBFwGjDGzy6q16QbkAP/XOdcLuCsOtYokBN21SZJBND33/kCxc26/c+5z4ClgeLU2twPL\nnXOHAZxzpbEtUySxDBo0iL59+7JgwQK/SxEJK5pwvxg4UOVxSWhZVd2B7ma22cy2mtmNsSpQJFE9\n9NBDLFu2jHfeecfvUkRqiCbcLcyy6reoaQl0A64FxgD/YWbn1diQ2XgzKzKzorKysvrWKpJQdNcm\nSWTRhHsJ0LHK4w7AwTBt1jrnjjvn3gX24oX9GZxzK51zmc65zHbt2jW0ZpGE8eMf/5iDBw/y7LPP\n+l2KyBmiCfdCoJuZdTWzVGA0sK5amzXAtwDMrC3eMM3+WBYqkoiq3rXp448/9rsckdNaRmrgnDth\nZncCG4EWwC+dc7vMbA5Q5JxbF1p3g5ntBk4CU5xzH8azcJFEMWDAAG644QZmzZrFkiVL/C5H4q20\nFPLyYMcOKC+HjAzo0wd++ENIoBEJ82usMDMz0xUVFfny2iKxVlZWxle/+lVefPFF+vbtmzQBIPVQ\nWAjz58OGDd7jqqfBpqWBczBoEOTkQL9+cSvDzLY55zIjtlO4i8TGY489xtalS/mPf/on7IUXvIU+\nBYDEWG4uTJ4MFRXee1gbM+99XrQIsrPjUkq04a7pB0Ri5Lbjx1m2axdu7Vov1Ktf4FRR4S1bswau\nvdYLDEl8lcF+5EjdwQ7e+iNHvPY+v78Kd5FYyM0lZcoU0k6dIiUUAGOBC4HWeGcY/Edl2wQKAImg\nsPCLYK/iH8AI4GygM1BjvpXK99fH0QmFu0hj1RIAOcB7wMd4p5dNB7ZVbZAAASARzJ/vfeOqZiKQ\nCnwAPAlkA7uqN6qo8J7vE4W7SGPVEgC9gLNCv1voZ1/1Rj4HgNShtNQ7eFptKOYzIB+YC5wDDABu\nAlZXf75zsH49+HTBpsJdpDFqCYBKE4B0oCfeEM3g6g18DgCpQ15e2MVv450T3r3Ksr6E6bmDd4C1\nlu3EW8Tz3EWkDhH+cFcAS4EtwB/5oid/hsoAmDLljMXOuYg/frRrLq/5jf/8T7qEmfXzUyCj2rIM\n4JNw721FBezcGW5N3CncRRpjx46aZ8VU0wLvq/uvgFzg/1VvUFHB6p/8hB/ce2/YOWrMrM6faNrE\nul1zeM2v7NtHlzDv5zl4x1Gq+hg4t7YPwOHDta2JK4W7SGOUl0fd9ARhxtxDvj9kCP+ydm2N8BEf\njR0LTz5ZY3F3vPfyHb6YQOsveMdYwmrTJh7VRaQxd5HGyKj+Bd1Tinfjg0/x5uPYCPwGuK6WzaR8\n+cu0aNGClJQUBXui6NMHWrWqsfhsYCQwA+/g6mZgLXBLuG2kpUHv3nEssnYKd5HGqCUADG8IpgPQ\nBpgMPETNu9wAvgaA1GHcuFpXrQAqgPZ4c5znUkvP3bk6txNPCneRxqjlD7cd8CrwEd547E6825WF\n5WMASB3at/emigjzTerLeFPhfgb8N/Av4Z5vBoMH+zaXkMJdpDHqCICo+BwAEkFOjvfNqiHS0rzn\n+0ThLtJYSRwAEkG/ft4kYOnp9Xteerr3vMyI83vFjcJdpLGSOAAkCtnZX7y/kb6hmX3xvsZpVsho\nKdxFYiE7m/cmTeII4JIoACRK2dnw6qswYoR3AL36N7W0NG/5iBFeuwR4X3Weu0iM3LF9O/86ZQqj\n9u3zphQwO3POmcr53AcP9oZi1GNPLpmZkJ/vTRWRl+ddeXr4sHcee+/e3kHxBDp2onAXiYFNmzax\nd+9evrtuHaSmJk0ASAO0a1djqohEpHAXaSTnHNOmTWPmzJmkpqZ6C5MkACS4NOYu0kgvvvgipaWl\njB071u9SRE5TuIs0gnOO6dOnM2fOHFq21BdhSRwKd5FGWLNmDcePH+fmm2/2uxSRM6irIdJAJ0+e\n5P7772fBggWkpKifJIlFn0iRBnrqqac499xzGTJkiN+liNSgnrtIAxw/fpyZM2eycuVKTdErCUk9\nd5EGyMvLo3Pnzlx3XW0ztIv4Sz13kXo6evQoc+fO5ZlnnvG7FJFaRdVzN7MbzWyvmRWb2X11tLvZ\nzJyZ6bpqCaxHH32Uvn37cuWVV/pdikitIvbczawFsBz4NlACFJrZOufc7mrtzsW79+9r8ShUJBF8\n+umnzJ8/n40bN/pdikidoum59weKnXP7nXOf490aMtzdwuYCDwJ13wpeJIk9/PDDXHvttfTt29fv\nUkTqFM2Y+8XAgSqPS4BvVG1gZlcAHZ1zvzOzyTGsTyRhfPTRR/z85z/nT3/6k9+liEQUTc893Hle\n7vRKsxTg58A9ETdkNt7MisysqKysLPoqRRLAokWLGDZsGD169PC7FJGIoum5lwAdqzzuABys8vhc\n4KvAH0Pn+14ArDOzm5xzRVU35JxbCawEyMzMdIgkidLSUnJzc9m2bZvfpYhEJZqeeyHQzcy6mlkq\nMBpYV7nSOVfunGvrnOvinOsCbAVqBLtIMluwYAFjxoyhS5cufpciEpWIPXfn3AkzuxPYCLQAfumc\n22Vmc4Ai59y6urcgktxKSkrIy8tj165dfpciErWoLmJyzq0H1ldbNqOWttc2viyRxDFv3jx+9KMf\nceGFF/pdikjUdIWqSB3279/Pb3/7W95++22/SxGpF80tI1KHWbNmMWnSJM4//3y/SxGpF/XcRWqx\ne/duXnjhBYqLi/0uRaTe1HMXqcWMGTOYMmUKrVu39rsUkXpTz10kjG3btrFlyxaeeOIJv0sRaRD1\n3EXCmD59OlOnTiU9Pd3vUkQaRD13kWr+9Kc/sWfPHtauXet3KSINpp67SBXOOaZNm8bMmTNJTU31\nuxyRBlO4i1Tx0ksv8cEHH3DLLbf4XYpIoyjcRUIqe+2zZ8+mZUuNWEpyU7iLhKxdu5bjx4/zve99\nz+9SRBpN3RMR4OTJk9x///389Kc/JSVFfR5JfvoUiwBPP/00Z599NkOHDvW7FJGYUM9dmr3jx48z\nc+ZMHnnkEUI3nBFJeuq5S7O3atUqOnbsyMCBA/0uRSRm1HOXZu3YsWPMmTOHp59+2u9SRGJKPXdp\n1h599FH69u3LVVdd5XcpIjGlnrs0W5999hnz589nw4YNfpciEnPquUuz9fDDD3PNNddw+eWX+12K\nSMyp5y7N0kcffcSSJUvYtGmT36WIxIV67tIsLV68mKFDh9KzZ0+/SxGJC/XcpdkpKytjxYoVbNu2\nze9SROJGPXdpdhYsWMDo0aPp0qWL36WIxE3we+6lpZCXBzt2QHk5ZGRAnz7wwx9Cu3Z+VydN7P33\n3+fxxx/nrbfe8rsUkbgy55wvL5yZmemKiori9wKFhTB/PlSe5nb06Bfr0tLAORg0CHJyoF+/+NUh\nCSU7O5tzzjmHhQsX+l2KSIOY2TbnXGakdsHsuefmwuTJUFHhhXh1FRXef9esgY0bYdEiyM5u2hql\nye3fv59nnnmGvXv3+l2KSNwFL9wrg/3IkchtnfPaTZ7sPVbAB9rs2bOZNGkSbdu29bsUkbiL6oCq\nmd1oZnvNrNjM7guz/m4z221mO8zsFTPrHPtSo1BYWCPYjwG3AZ2Bc4ErgBrXI1YGfDyHicRXe/bs\nYcOGDdx9991+lyLSJCKGu5m1AJYDg4DLgDFmdlm1Zm8Cmc65PsCzwIOxLjQq8+d/MeQScgLoCLwK\nlANzgVHAe9WfW1HhPV8CacaMGUyePJnWrVv7XYpIk4im594fKHbO7XfOfQ48BQyv2sA59wfnXGV3\neSvQIbZlRqG01Dt4Wm2M/WxgFtAFb2eHAl2BGmc4Owfr10NZWdxLlab1xhtvsHnzZu68806/SxFp\nMtGE+8XAgSqPS0LLanMbYUY+4i4vL6pmHwBvA73CrTSLejuSPKZPn87UqVNJT0/3uxSRJhPNAdVw\nt6YJe/6kmY0FMoFv1rJ+PDAeoFOnTlGWGKUdO8483TGM48D3gR8AYS86r6iAnTtjW5f4avPmzeze\nvZvnnnvO71JEmlQ0PfcSvGHrSh2Ag9Ubmdn1wDTgJufcsXAbcs6tdM5lOucy28X6AqLy8jpXnwJu\nAVKBZXU1PHw4djWJr5xzTJs2jRkzZnDWWWf5XY5Ik4om3AuBbmbW1cxSgdHAuqoNzOwK4FG8YC+N\nfZlRyMiodZXDGyv6AMgHvlTXdtq0iWlZ4p+XX36ZQ4cOceutt/pdikiTixjuzrkTwJ3ARmAP8Ixz\nbpeZzTGzm0LNFgLnAL81s+1mtq6WzcVPnz7QqlXYVdl4hf8nkFbHJo61aMGbJ0/y8ccfx6FAaUqV\nvfbZs2fTsmXwLucQiSSq89ydc+udc92dc//knHsgtGyGc25d6PfrnXP/xzl3eejnprq3GAfjxoVd\n/De8rxTbgQvw/gU6B3gyTNsWKSksLC2lQ4cODB06lMcff5wPP/wwTgVLPK1bt45jx44xatQov0sR\n8UVwZoVs396bK8bOPP7bGW9Y5ijwaZWf71d/vhkthw3j1y+9RElJCd///vd5/vnnueSSS7j++utZ\nsWIFhw4daoIdkcY6deoU999/P3PnziUlJTgfcZH6CNYnPyfHmxSsIdLSvOcDrVu3ZsyYMTz77LMc\nOnSIiRMnsmXLFnr16sWAAQNYsmQJ7733Xuzqlph6+umnSU9PZ9iwYX6XIuKbYIV7v37eJGD1PZ85\nPd17XmbNidbS09MZMWIEq1ev5n/+53+YNm0ae/bsoX///nz961/npz/9KX/9619jtAPSWCdOnGDm\nzJk88MADmIU7i1ekeQhWuIM3+VdlwEf64zb7ItijmDQsNTWVQYMG8dhjj3Hw4EEWL17MoUOHGDhw\nIL169WLGjBls374dv6ZRFli1ahUdOnRg4MCBfpci4qvgzudeVOTNFbN+vRfiVeecqZzPffBgbygm\nTI+9Pk6dOsXrr79Ofn4++fn5mBlZWVmMHDmS/v37a9y3iRw7dozu3bvz1FNPcdVVV/ldjkhcRDuf\ne3DDvVJZmTelwM6d3gVKbdpA797e2TVxuBOTc46//OUv5OfnU1BQQHl5OSNGjCArK4urr76aFi1a\nxPw1xbN06VI2btzI7373O79LEYkbhXuC+Otf/0pBQQH5+fkcOHCA4cOHk5WVxXXXXUdqaqrf5QXG\nZ599xle+8hU2bNjA5Zdf7nc5InETbbhrvCDOevbsydSpU9m2bRuvv/46l156KXPnzuWCCy7glltu\n4bnnnuNINDcWkTotXbqUq6++WsEuEqKeu08OHjzImjVrKCgooKioiG9/+9uMHDmSIUOGaM7xevro\no4/o1q0bmzZtomfPsFPCiQSGeu4J7qKLLmLChAm8/PLLFBcXM2jQIJ588kldHdsAS5YsYciQIQp2\nkSrUc08w5eXlrF+/nvz8fF566SX69etHVlYW3/3ud7nwwgv9Li/hlJWV0bNnT4qKiujatavf5YjE\nnQ6oBsCRI0fYuHEj+fn5PP/88/Tq1YusrCxGjBhBly5d/C4vIUyePJkjR46wYsUKv0sRaRIK94D5\n/PPPeeWVVygoKGDt2rV06tSJkSNHkpWVRY8ePfwuzxcHDx6kd+/e7Ny5k4suusjvckSahMI9wE6c\nOMGmTZsoKCigoKCA8847j6ysLLKysujTp0+zuex+woQJnH322SxcuNDvUkSajMK9mTh16hSvvfba\n6XPpU1JSGDlyZOCvjn333XfJzMxk7969tG3b1u9yRJqMwr0Zqnp1bH5+Ph9//PHpoA/a1bHjxo2j\nc+fOzJ492+9SRJqUwl3Ys2fP6aGbIF0du2fPHr75zW/yzjvvkFHH7RVFgkjhLmd49913Twf9nj17\nGDJkCFlZWXznO98hraFz4Ptk1KhRfP3rX+fee+/1uxSRJqdwl1pVXh2bn59PUVERN9xwA1lZWQwe\nPDjhr4598803GTJkCO+88w5nn3223+WINDldoSq1qrw69pVXXmHfvn0MGjSI1atX06FDB4YNG5bQ\nV8dOnz6dnJwcBbtIBOq5y2nl5eU8//zz5Ofn8/LLL9O/f39GjhzJiBEjuOCCC/wujz//+c+MGTOG\nt99+m7POOsvvckR8oWEZaZQjR47wwgsvUFBQcMbVsSNHjqRz585NXo9zjuuuu46xY8dy2223Nfnr\niyQKhbvEzLFjx/j9739Pfn4+a9eupXPnzqeDPuZXx5aWejdX2bEDysshIwP69OHVSy7h9qlT2b17\nNy1btozta4okEYW7xEXl1bH5+fk899xztGnT5vQ0CI26Oraw0Lst4oYN3uOjR0+vcmlpfH70KKWZ\nmXRcvty7EbpIM6Vwl7irvDq28qKpFi1anA76fv36RX91bG4uTJ7s3ee2js+jM8PS0qK+oblIECnc\npUk559i+ffvpaRA++eST0/eOHTBgQO1Xx1YGe33uRpWeroCXZkvhLr6qvDo2Pz+f999/n+HDhzNy\n5Mgzr44tLIRrr6012N8BegM3A7+qvjI9HV59FTIjfsZFAiWm57mb2Y1mttfMis3svjDrzzKzp0Pr\nXzOzLvUvWYLk0ksvZdq0abzxxhts3bqVHj16nL537K233sqaNWs4OW+eNxRTi4lAraPrFRXeGL2I\nhBUx3M2sBbAcGARcBowxs8uqNbsNOOyc+wrwc+BnsS5UklfXrl2555572Lx5M2+99Rbf+MY3WL14\nMcfXrat1jP0p4DxgYG0bdQ7Wr4eysjhVLZLcoum59weKnXP7nXOf4/3dDa/WZjiwKvT7s8BAay6T\niku9XHTRRUycOJH8YcM4q1WrsG0+BmYAiyNtzMw7bVJEaogm3C8GDlR5XBJaFraNc+4EUA6cH4sC\nJaB27MCqnO5Y1f14XwU7RtpGRQXs3BnjwkSCIZqrQcL1wKt/l46mDWY2HhgP0KlTpyheWgKrvDzs\n4u3Ay8Cb0W7n8OEYFSQSLNGEewlndqI6AAdraVNiZi2BDOAf1TfknFsJrATvbJmGFCwBUcs87H8E\n3gMq/+n/FDgJ7AbeCPeENm1iXppIEEQzLFMIdDOzrmaWCowG1lVrsw74Qej3m4HfO7/OsZTk0KcP\nhBlzHw/sw+vBbwfuAIYAG8NtIy0NeveOY5EiyStiuIfG0O/E+/vaAzzjnNtlZnPM7KZQs/8PnG9m\nxcDdQI3TJUXOMG5c2MXpwAVVfs4BWgHtwjV2rtbtiDR3Uc3A5JxbD6yvtmxGld+PAt+LbWkSaO3b\nw6BBsGZNnVMOzKpthRkMHgztwsa+SLOnm3WIf3JyvKGVhkhL854vImEp3MU//fp5c8Skp9fveZVz\ny2jqAZFaaWJs8Vfl5F9RzAqJmddj16RhIhGp5y7+y872JgEbMcI7g6b6UE1amrd8xAivnYJdJCL1\n3CUxZGZCfr43V0xennfl6eHD3nnsvXt7Z8Xo4KlI1BTukljatYMpU/yuQiTpaVhGRCSAFO4iIgGk\ncBcRCSCFu4hIACncRUQCSOEuIhJACncRkQBSuIuIBJD5dU8NMysD/tbEL9sW+HsTv6YftJ/B0Rz2\nEbSf9dHZORfxcm3fwt0PZlbknAv8VILaz+BoDvsI2s940LCMiEgAKdxFRAKouYX7Sr8LaCLaz+Bo\nDvsI2s+Ya1Zj7iIizUVz67mLiDQLgQx3M7vRzPaaWbGZ3Rdm/Vlm9nRo/Wtm1qXpq2y8KPbzbjPb\nbWY7zOwVM+vsR52NEWkfq7S72cycmSXlGRfR7KeZjQq9n7vM7NdNXWMsRPGZ7WRmfzCzN0Of28F+\n1NkYZvYUS70MAAADKUlEQVRLMys1s7dqWW9m9nDo/8EOM/taXApxzgXqB2gB7AMuAVKBvwCXVWsz\nAXgk9Pto4Gm/647Tfn4LSA/9np1s+xnNPobanQv8F7AVyPS77ji9l92AN4E2ocft/a47Tvu5EsgO\n/X4Z8J7fdTdgP68Bvga8Vcv6wcAGwIArgdfiUUcQe+79gWLn3H7n3OfAU8Dwam2GA6tCvz8LDDQz\na8IaYyHifjrn/uCcOxJ6uBXo0MQ1NlY07yXAXOBB4GhTFhdD0ezn7cBy59xhAOdcaRPXGAvR7KcD\nWod+zwAONmF9MeGc+y/gH3U0GQ484TxbgfPM7MJY1xHEcL8YOFDlcUloWdg2zrkTQDlwfpNUFzvR\n7GdVt+H1FpJJxH00syuAjs653zVlYTEWzXvZHehuZpvNbKuZ3dhk1cVONPs5CxhrZiXAemBS05TW\npOr7t9sgQbyHargeePVTgqJpk+ii3gczGwtkAt+Ma0WxV+c+mlkK8HNgXFMVFCfRvJct8YZmrsX7\nBrbJzL7qnPsozrXFUjT7OQbIc84tNrOrgNWh/TwV//KaTJPkTxB77iVAxyqPO1Dzq93pNmbWEu/r\nX11foxJRNPuJmV0PTANucs4da6LaYiXSPp4LfBX4o5m9hzd+uS4JD6pG+5ld65w77px7F9iLF/bJ\nJJr9vA14BsA5twVohTcfS5BE9bfbWEEM90Kgm5l1NbNUvAOm66q1WQf8IPT7zcDvXehIRxKJuJ+h\nIYtH8YI9Gcdo69xH51y5c66tc66Lc64L3nGFm5xzRf6U22DRfGbX4B0gx8za4g3T7G/SKhsvmv38\nb2AggJldihfuZU1aZfytA24NnTVzJVDunDsU81fx+8hynI5WDwbexjsyPy20bA7eHz54H5jfAsXA\n68Alftccp/18GfgA2B76Wed3zbHex2pt/0gSni0T5XtpwBJgN7ATGO13zXHaz8uAzXhn0mwHbvC7\n5gbs42+AQ8BxvF76bcAdwB1V3svlof8HO+P1mdUVqiIiARTEYRkRkWZP4S4iEkAKdxGRAFK4i4gE\nkMJdRCSAFO4iIgGkcBcRCSCFu4hIAP0v8ncXPBCruC4AAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 1, 4]\n" + ] + } + ], + "source": [ + "import networkx as nx\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")\n", + "nx.draw_networkx(dataset[12])\n", + "plt.show()\n", + "print(list(dataset[12][3]))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "cf = BasicCostFunction(1,3,1,3)\n", + "N=len(dataset)\n", + "N=10\n", + "ged_distances = np.zeros((N,N))\n", + "for i in range(0,N):\n", + " for j in range(i,N):\n", + " ged_distances[j,i] = ged_distances[i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n", + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n", + "(0, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))\n" + ] + } + ], + "source": [ + "G1=dataset[12]\n", + "G2=dataset[12]\n", + "\n", + "print(ged(G1,G2))\n", + "print(ged(G1,G2,method='Basic'))\n", + "print(ged(G1,G2,method='Riesen'))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "autoscroll": false, + "ein.tags": "worksheet-0", + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0: {0: [0], 3: [0, 3], 1: [0, 3, 1], 4: [0, 3, 4], 2: [0, 3, 4, 2]}, 1: {1: [1], 3: [1, 3], 0: [1, 3, 0], 4: [1, 3, 4], 2: [1, 3, 4, 2]}, 2: {2: [2], 4: [2, 4], 3: [2, 4, 3], 0: [2, 4, 3, 0], 1: [2, 4, 3, 1]}, 3: {3: [3], 0: [3, 0], 1: [3, 1], 4: [3, 4], 2: [3, 4, 2]}, 4: {4: [4], 2: [4, 2], 3: [4, 3], 0: [4, 3, 0], 1: [4, 3, 1]}}\n", + "[[ 0. 2. 3. 1. 2.]\n", + " [ 2. 0. 3. 1. 2.]\n", + " [ 3. 3. 0. 2. 1.]\n", + " [ 1. 1. 2. 0. 1.]\n", + " [ 2. 2. 1. 1. 0.]]\n" + ] + } + ], + "source": [ + "l = nx.shortest_path(G1)\n", + "print(l)\n", + "\n", + "distances = np.zeros((G1.number_of_nodes(),G1.number_of_nodes()))\n", + "for i in l.keys():\n", + " for j in l[i].keys():\n", + " distances[i,j] = len(l[i][j])-1\n", + "\n", + "print(distances)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + }, + "name": "test_lib.ipynb" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/run_cyclicpatternkernel.ipynb b/notebooks/run_cyclicpatternkernel.ipynb new file mode 100644 index 0000000..2194700 --- /dev/null +++ b/notebooks/run_cyclicpatternkernel.ipynb @@ -0,0 +1,1252 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 0.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 373.39it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 26367.08it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.18705153465270996 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 83%|████████▎ | 834/1000 [00:00<00:00, 2077.02it/s]\n", + " Mean performance on train set: 0.549180\n", + "With standard deviation: 0.016798\n", + "\n", + " Mean performance on test set: 0.642857\n", + "With standard deviation: 0.146385\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2083.52it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 50.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 231.33it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 15078.65it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3006291389465332 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 81%|████████ | 808/1000 [00:00<00:00, 2005.12it/s]\n", + " Mean performance on train set: 0.698361\n", + "With standard deviation: 0.116889\n", + "\n", + " Mean performance on test set: 0.871429\n", + "With standard deviation: 0.100000\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2024.59it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 100.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 224.68it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13144.65it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30983662605285645 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 82%|████████▏ | 821/1000 [00:00<00:00, 2050.17it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2050.63it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 150.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.10it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12644.09it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31808018684387207 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 993/1000 [00:00<00:00, 1993.90it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1977.95it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 200.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.08it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14177.69it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31757450103759766 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:00<00:00, 1969.03it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1951.39it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 250.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 218.22it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12697.56it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3192298412322998 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1878.10it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1875.67it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 300.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 206.81it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12364.00it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33614420890808105 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 970/1000 [00:00<00:00, 1947.13it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1934.26it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 350.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 189.65it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13989.93it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3654501438140869 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1875.81it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1881.94it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 400.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 220.95it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14281.34it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3142852783203125 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 952/1000 [00:00<00:00, 1900.77it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1900.46it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 450.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 212.09it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 11357.62it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3287320137023926 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 981/1000 [00:00<00:00, 1956.30it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1952.54it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 500.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.14it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12536.27it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3347315788269043 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 979/1000 [00:00<00:00, 1970.30it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1950.19it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 550.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.06it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13816.44it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3341798782348633 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 974/1000 [00:00<00:00, 1930.44it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1937.89it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 600.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 213.56it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13048.43it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32569050788879395 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 972/1000 [00:00<00:00, 1924.82it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1935.68it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 650.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 216.51it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 9669.54it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3229689598083496 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:00<00:00, 1963.08it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1960.32it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 700.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.61it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13485.23it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33377623558044434 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 81%|████████ | 812/1000 [00:00<00:00, 2020.52it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2029.28it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 750.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.54it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13952.29it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31093406677246094 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2053.32it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2055.77it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 800.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.35it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13220.82it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31124091148376465 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 959/1000 [00:00<00:00, 1925.40it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1912.78it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 850.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 202.00it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12487.42it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.34392237663269043 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1869.41it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1883.23it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 900.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 217.23it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13956.38it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32010626792907715 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 989/1000 [00:00<00:00, 1978.29it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1968.44it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 950.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 228.56it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14794.72it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30414795875549316 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 83%|████████▎ | 829/1000 [00:00<00:00, 2063.72it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2068.06it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.02it/s]\n", + "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13702.27it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3120880126953125 seconds ---\n", + "[[ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " [ 8. 8. 8. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 8. 8. ... 10. 9. 9.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]\n", + " [ 8. 8. 8. ... 9. 10. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2054.81it/s]\n", + " Mean performance on train set: 0.732787\n", + "With standard deviation: 0.082637\n", + "\n", + " Mean performance on test set: 0.900000\n", + "With standard deviation: 0.111575\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2022.62it/s]\n", + "\n", + "\n", + " cycle_bound accur_test std_test accur_train std_train k_time\n", + "------------- ------------ ---------- ------------- ----------- --------\n", + " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n", + " 50 0.871429 0.1 0.698361 0.116889 0.300629\n", + " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n", + " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n", + " 200 0.9 0.111575 0.732787 0.0826366 0.317575\n", + " 250 0.9 0.111575 0.732787 0.0826366 0.31923\n", + " 300 0.9 0.111575 0.732787 0.0826366 0.336144\n", + " 350 0.9 0.111575 0.732787 0.0826366 0.36545\n", + " 400 0.9 0.111575 0.732787 0.0826366 0.314285\n", + " 450 0.9 0.111575 0.732787 0.0826366 0.328732\n", + " 500 0.9 0.111575 0.732787 0.0826366 0.334732\n", + " 550 0.9 0.111575 0.732787 0.0826366 0.33418\n", + " 600 0.9 0.111575 0.732787 0.0826366 0.325691\n", + " 650 0.9 0.111575 0.732787 0.0826366 0.322969\n", + " 700 0.9 0.111575 0.732787 0.0826366 0.333776\n", + " 750 0.9 0.111575 0.732787 0.0826366 0.310934\n", + " 800 0.9 0.111575 0.732787 0.0826366 0.311241\n", + " 850 0.9 0.111575 0.732787 0.0826366 0.343922\n", + " 900 0.9 0.111575 0.732787 0.0826366 0.320106\n", + " 950 0.9 0.111575 0.732787 0.0826366 0.304148\n", + " 1000 0.9 0.111575 0.732787 0.0826366 0.312088\n" + ] + } + ], + "source": [ + "# MAO dataset (node labeled, edge labeled, undirected, cyclic + linear, classification)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/MAO/dataset.ds'\n", + "kernel_file_path = 'kernelmatrices_cyclicpattern_mao/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 500, 21), normalize = False,\n", + " model_type = 'classification')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n", + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 0.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 176.07it/s]\n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 18331.07it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5411422252655029 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 951/1000 [00:00<00:00, 1898.18it/s]\n", + " Mean performance on train set: 0.629762\n", + "With standard deviation: 0.013521\n", + "\n", + " Mean performance on test set: 0.610000\n", + "With standard deviation: 0.113578\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1889.49it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 10.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 165.16it/s]\n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 16217.54it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5770719051361084 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 940/1000 [00:00<00:00, 1876.61it/s]\n", + " Mean performance on train set: 0.629762\n", + "With standard deviation: 0.013521\n", + "\n", + " Mean performance on test set: 0.610000\n", + "With standard deviation: 0.113578\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1866.80it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 20.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 165.21it/s]\n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 16888.61it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5768516063690186 seconds ---\n", + "[[3. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 926/1000 [00:00<00:00, 1837.36it/s]\n", + " Mean performance on train set: 0.629762\n", + "With standard deviation: 0.013521\n", + "\n", + " Mean performance on test set: 0.610000\n", + "With standard deviation: 0.113578\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1841.13it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 30.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 171.51it/s]\n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 17701.46it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5560076236724854 seconds ---\n", + "[[3. 3. 3. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 92%|█████████▏| 923/1000 [00:00<00:00, 1845.18it/s]\n", + " Mean performance on train set: 0.633333\n", + "With standard deviation: 0.015793\n", + "\n", + " Mean performance on test set: 0.640000\n", + "With standard deviation: 0.111355\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1836.56it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 40.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 159.66it/s]\n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 17703.84it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5963354110717773 seconds ---\n", + "[[3. 3. 3. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 84%|████████▍ | 845/1000 [00:00<00:00, 1694.10it/s]\n", + " Mean performance on train set: 0.633333\n", + "With standard deviation: 0.015793\n", + "\n", + " Mean performance on test set: 0.640000\n", + "With standard deviation: 0.111355\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1694.34it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 50.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 126.36it/s]\n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14863.89it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.7526798248291016 seconds ---\n", + "[[3. 3. 3. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 84%|████████▍ | 842/1000 [00:00<00:00, 1670.86it/s]\n", + " Mean performance on train set: 0.658333\n", + "With standard deviation: 0.034524\n", + "\n", + " Mean performance on test set: 0.670000\n", + "With standard deviation: 0.090000\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1665.11it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 60.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 107.33it/s]\n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 13937.03it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.8846912384033203 seconds ---\n", + "[[3. 3. 3. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " [3. 4. 4. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 83%|████████▎ | 829/1000 [00:00<00:00, 1653.86it/s]\n", + " Mean performance on train set: 0.671429\n", + "With standard deviation: 0.036577\n", + "\n", + " Mean performance on test set: 0.680000\n", + "With standard deviation: 0.107703\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1673.57it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 70.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 81.45it/s] \n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14275.64it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1631414890289307 seconds ---\n", + "[[3. 3. 3. ... 3. 3. 3.]\n", + " [3. 4. 4. ... 4. 4. 4.]\n", + " [3. 4. 4. ... 4. 4. 4.]\n", + " ...\n", + " [3. 4. 4. ... 7. 7. 7.]\n", + " [3. 4. 4. ... 7. 7. 7.]\n", + " [3. 4. 4. ... 7. 7. 7.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 88%|████████▊ | 876/1000 [00:00<00:00, 1761.00it/s]\n", + " Mean performance on train set: 0.666667\n", + "With standard deviation: 0.038021\n", + "\n", + " Mean performance on test set: 0.670000\n", + "With standard deviation: 0.100499\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1754.20it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 80.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 79.93it/s] \n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14789.73it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1846554279327393 seconds ---\n", + "[[3. 3. 3. ... 3. 3. 3.]\n", + " [3. 4. 4. ... 4. 4. 4.]\n", + " [3. 4. 4. ... 4. 4. 4.]\n", + " ...\n", + " [3. 4. 4. ... 7. 7. 7.]\n", + " [3. 4. 4. ... 7. 7. 7.]\n", + " [3. 4. 4. ... 7. 7. 7.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 926/1000 [00:00<00:00, 1854.59it/s]\n", + " Mean performance on train set: 0.709524\n", + "With standard deviation: 0.058853\n", + "\n", + " Mean performance on test set: 0.780000\n", + "With standard deviation: 0.107703\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1844.77it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 90.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 83.75it/s] \n", + "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14169.95it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1314406394958496 seconds ---\n", + "[[3. 3. 3. ... 3. 3. 3.]\n", + " [3. 4. 4. ... 4. 4. 4.]\n", + " [3. 4. 4. ... 4. 4. 4.]\n", + " ...\n", + " [3. 4. 4. ... 7. 7. 7.]\n", + " [3. 4. 4. ... 7. 7. 7.]\n", + " [3. 4. 4. ... 7. 7. 7.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 943/1000 [00:00<00:00, 1878.69it/s]\n", + " Mean performance on train set: 0.709524\n", + "With standard deviation: 0.058853\n", + "\n", + " Mean performance on test set: 0.780000\n", + "With standard deviation: 0.107703\n", + "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1872.55it/s]\n", + "\n", + "\n", + " cycle_bound accur_test std_test accur_train std_train k_time\n", + "------------- ------------ ---------- ------------- ----------- --------\n", + " 0 0.61 0.113578 0.629762 0.0135212 0.541142\n", + " 10 0.61 0.113578 0.629762 0.0135212 0.577072\n", + " 20 0.61 0.113578 0.629762 0.0135212 0.576852\n", + " 30 0.64 0.111355 0.633333 0.0157935 0.556008\n", + " 40 0.64 0.111355 0.633333 0.0157935 0.596335\n", + " 50 0.67 0.09 0.658333 0.0345238 0.75268\n", + " 60 0.68 0.107703 0.671429 0.0365769 0.884691\n", + " 70 0.67 0.100499 0.666667 0.0380208 1.16314\n", + " 80 0.78 0.107703 0.709524 0.0588534 1.18466\n", + " 90 0.78 0.107703 0.709524 0.0588534 1.13144\n" + ] + } + ], + "source": [ + "# PAH dataset (node and edge unlabeled, undirected, cyclic, classification)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/PAH/dataset.ds'\n", + "kernel_file_path = 'kernelmatrices_cyclicpattern_pah/'\n", + "\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = False)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 90, 10), normalize = False, \\\n", + " model_type = 'classification')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# results\n", + "\n", + "# MAO dataset\n", + "cycle_bound accur_test std_test accur_train std_train k_time\n", + "------------- ------------ ---------- ------------- ----------- --------\n", + " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n", + " 50 0.871429 0.1 0.698361 0.116889 0.300629\n", + " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n", + " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n", + " 200 0.9 0.111575 0.732787 0.0826366 0.317575\n", + " \n", + "# PAH dataset\n", + " cycle_bound accur_test std_test accur_train std_train k_time\n", + "------------- ------------ ---------- ------------- ----------- --------\n", + " 0 0.61 0.113578 0.629762 0.0135212 0.521801\n", + " 10 0.61 0.113578 0.629762 0.0135212 0.52589\n", + " 20 0.61 0.113578 0.629762 0.0135212 0.548528\n", + " 30 0.64 0.111355 0.633333 0.0157935 0.535311\n", + " 40 0.64 0.111355 0.633333 0.0157935 0.61764\n", + " 50 0.67 0.09 0.658333 0.0345238 0.733868\n", + " 60 0.68 0.107703 0.671429 0.0365769 0.871147\n", + " 70 0.67 0.100499 0.666667 0.0380208 1.12625\n", + " 80 0.78 0.107703 0.709524 0.0588534 1.19828\n", + " 90 0.78 0.107703 0.709524 0.0588534 1.21182" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "load SDF: 100%|██████████| 4457424/4457424 [00:10<00:00, 408299.51it/s]\n", + "ajust data: 100%|██████████| 42687/42687 [00:10<00:00, 4092.17it/s] \n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 42682/42682 [19:36<00:00, 36.27it/s]\n", + "calculate kernels: 100%|██████████| 42682/42682 [37:05<00:00, 19.18it/s] \n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 42682 built in 3402.171978712082 seconds ---\n", + "[[ 9. 9. 3. ... 4. 3. 4.]\n", + " [ 9. 11. 5. ... 6. 5. 6.]\n", + " [ 3. 5. 16. ... 6. 6. 6.]\n", + " ...\n", + " [ 4. 6. 6. ... 30. 29. 6.]\n", + " [ 3. 5. 6. ... 29. 29. 6.]\n", + " [ 4. 6. 6. ... 6. 6. 11.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 7%|▋ | 70/1000 [1:34:57<227:25:45, 880.37s/it]" + ] + } + ], + "source": [ + "# NCI-HIV dataset (labeled?, directed?, cyclic, classification)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n", + "datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 1000, 21), normalize = False, \\\n", + " datafile_y = datafile_y, model_type = 'classification')\n", + "\n", + "# kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para,\n", + "# normalize = False, datafile_y = datafile_y, model_type = 'classification')\n", + "\n", + "# kernel_para['k_func'] = 'minmax'\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzsnXl4FEXawH9FICRBjgSC3AQ5PLkk4KIrsggKiGA4VhSRQz41K+KFB7ouqLgcsuIBIq4HsiKKoBGRQ0UWlRUICAKiyH0q4ZIrCTnm/f6ontCZzEwmyVxJ6vc89aS7urq7pidTb9db76FEBIPBYDAYnFQIdQcMBoPBEF4YwWAwGAyGfBjBYDAYDIZ8GMFgMBgMhnwYwWAwGAyGfBjBYDAYDIZ8GMFgMBgMhnwYwWAwGAyGfBjBYDAYDIZ8VAx1B4pDrVq1JCEhIdTdMBgMhlLF+vXrj4pIfGHtSqVgSEhIYN26daHuhsFgMJQqlFJ7fWlnVEkGg8FgyIcRDAaDwWDIhxEMBoPBYMiHX9YYlFJvA72ANBG5ws1xBbwM9ATSgaEi8oN1bAjwd6vpeBF51x99MhjKNGlpMGsWbNoEJ09C9erQqhUMGwbxha4tGgxeUf7Ix6CU6gScAWZ7EAw9gfvRguEq4GURuUopFQesAxIBAdYD7UTkhLf7JSYmill8NpRLUlNhwgRYskTvZ2aePxYdDSLQoweMGQPt24emj4awRSm1XkQSC2vnF1WSiHwDHPfSpA9aaIiIrAZqKKXqAjcCX4rIcUsYfAl090efDIYyx4wZ0LkzpKRogWAXCgAZGbouJUW3mzEjFL00lAGCtcZQH9hv2z9g1XmqL4BS6m6l1Dql1LojR44ErKMGQ1gyYwaMHg3p6XpW4A0R3W70aCMcDMWi1Cw+i8gbIpIoIonxRodqKE+kpnLukUe4Kz2dxkBVoA1gKZOYA1xgKzGAAtY7hYNRuxqKSLAEw0GgoW2/gVXnqd5gMDiZMIGcjAwaAiuBk8B44K/AHmAQeoHPWV4DLgKuBK1emjAhBJ02lGaCJRgWAncqzZ+AkyLyG7AMuEEpFauUigVusOoMBgNo66MlS6gCjAMS0D/aXkATtLWGK+8Cd6JnDYjA4sVg1K+GIuAvc9W5QGegllLqADAWqAQgIq8Di9EWSTvQ5qrDrGPHlVLPAanWpZ4VEW+L2AZD+WLWLLfVh4Ffgctd6vcC3wBv2yuV0td59FH/98+JMZ8tU/hFMIjIbYUcF+A+D8fexuX/2GAwWGzaVMD6KButPhoCXOLSfDZwLXo2kUdGBmzeHJj+eTOf/fhjGDvWmM+WQkplED2Dodxw8mS+XQcwGIgEprlpPht40t11Tnh1DSoeTkupjAz3llIZGfpvSgosWwZTpkBysv/7UZYIk5mXEQwGQzhTvXrepgB3odVIi7F0tTZWAYeA/u6uExvr337ZzWcLw24+C0Y4uCPMZl6lxlzVYCiXtGoFUVEAJAM/A58B0W6avgv0Q5uz5iM6Glq29F+fUlNh9GjuSE+nLlANaAG8aWuyHK3migH+gl77wJjPuicMHRf9EhIj2JiQGIZyQ1oaNG7M3sxMEoDK5J/mz0SvN2QCdYAFwPWu14iKgn37/KeK6NsXUlL4SYRmVp9+QVuffA40BpqiBcXNwNPAt8Bq0AvhSUmwYIH7a4eJKiVoFGXm5SQmpthqOV9DYiAipa60a9dODIZyQ1KSiFIiWilTtKKUSN++/uvL4cMiUVEF7vMLSB2QD0FmgnS0HTsDEgXys7MuKkokLS3/ddeu1Z8zKqrg9aOjdV1Skm5XVli7VjKjo2U4SCOQC0Bagyy2ffYPQS6xjl0K8onzWEyMSGpqkW8JrBMfxlijSjIYwp0xY7Q6qDhER+vz/YWL+ezf0OqiS4C6aJv0n4DWtjZV0DOIn5wVTvNZJ2GoSgkKhTguHgTuAF4ETgEvALcDaRBwx0UjGAyGcKd9e606iIkp0mmO6Gh9XmLhmgOfcTGffQ04jVYV9UWrlc4A1V1Oq261A/Kbz5bXGFA+OC4eAGoAPdDOijehhexOCLjjohEMBkNpIDn5vHBQyntbpciuVImJtWpx9s47/dsPF/NZgAjgz+iBbAY6XtMplzancFkUP3EibxF7Wno6iWihMtTWpEzHgPLBcTERuBQdNiIXSEE/o1bOxq4zLz9iBIPBUFpIToaVK/XibVRUQfVSdLSuT0qi4qpV/NqlC4MHD8bhcPivD9Vd5wLnyUG/zV4O/GirP2urdyI1amhVSEYG9dCZuoa7XK9Mx4DywXExAh3a5Ha0QLgdbWxQxXlCAB0XjR+DwVCaSEzUFj1Hjui3xc2b9dt3bKw2SR06FOLjUcDMmTPp1q0bTz31FBPsg2hJLH9atYIFC0jLzORrtOojGvgKmGuVjsCjaAupm4Bn0W+5Ti/tDKWY/vHHjDp3jkgR+lr169CzDk94jAFVGq2VfHBc/Ap4DPgvWhiuB3qjo+q2cZ4YCMdFMFZJBkNZ5siRI3LRRRfJO++84x/LH8sqKQ2kE0h1kKogV4C8YbvelyAXW9ZI14Hstt8rKkqO3X+/ZFesmK8PT4EM8WBdtQekAsgu1z5Pnhy0Z+lXBg3K+xwOkKEgnUHSbZ/vBZBbXJ5DH6s+r27w4CLdFh+tksyMwWAow9SqVYtFixbxdocO5GZnE5GVVbLwFbVrQ48exKeksNLLYnFXtG9DAZSCnj2JO34ccnJ8/hxBjwEVQHJycvglIoLmFSpQ2eHIc1z8ivyOi+2BicBG9AxhA3qR/2/OBv52XLRh1hgMhjLO8nHjWHH2LDHnzjHUNphvRS9wxlqlK7DVF8ufEpjPSnQ0x++5h+N79hTpvNlo3XsBAqVKSUuDyZPhjjvg5pv138mTS2QFdPjwYf75z39y0UUX8fjWrVSMiGAvet1gI9pB0bnQPge4Dm2x1B+9cN8PHQfrBucFRbTqMBD4Mq0It2JUSQaDj6xdKwsiI+UTkHtdVDUnLBWPAyQH5GWQlnY1hTcnqtde08eL4GyXrpSMioyUuLg4WVyzZoHjnlRJ34HEgJxycyx30CC/Py9/Oto5HA757rvv5Pbbb5caNWrIiBEjZP369fpgCBwX8VGVFPJBvjjFCAaDwUdsg483HX42yDSQaB8Hn/T0dDn49NOSXbmy5BYyuOUqJdmRkbLniSfkxIkT+gKTJuUNvNkgGSBPgNxhbWfbzv8/kMEeBM3fo6KkT58+8uqrr8rWrVvF4XAU/1k5hV1hg7VSut1rr3m81JkzZ2TmzJnSunVrad68uUydOlWOHz+ev9HatUUWrj4JbS8EVTAA3YFt6EQ8T7g5PtWaLW1Em+n+YTuWazu20Jf7GcFgMPiAS/gKT4KhOkgEiAJ5znVQj4yUj2fOlPHjx8uwYcOkU6dOUr9+falcubJcfPHFMurqq2VD06aSXbGiZEdG5r+28w27b9+Cg5itb2NBcCljrWtkWP37yt3gGBUlv2/eLHPmzJHhw4dLo0aNpF69enLHHXfIrFmzZP/+/b4/q2LMgNwJh59//llGjRolcXFx0qdPH/niiy8kNzc34Pf1laAJBrS57U60iXEk2oT5Mi/t7wfetu2fKeo9jWAwGHzA9lZe2IzhDMh0kEVu3spnt2wpjz/+uLzxxhuyfPly2bNnj+Tk5OS/V1qathAaPFikVy/9d/LkgjGR7PhZleJwOGT79u3y+uuvy4ABA6RmzZrSokULSU5Olvnz58uxY8fc98N6c38VpB1IpJfn9IwluL60DdLZ338vCxYskOuvv15q164tTz75pOzdu9f378mPM5XCCKZg6Agss+2PAcZ4af8/oJtt3wgGgyEQ2EwiCxMMApILEgdy2PVYEU0ifSbAqpTc3FzZsGGDTJkyRbp37y5Vq1aVdu3ayWOPPSbLli2Ts2fP6oaWgFoAbtdinGUH2iy3rk0w5Coln0dFyTXXXCNz5syRzMzM4j2L1FQt6KKi9EzLfm9vM68i4qtg8Ie5an1gv23/AHCVu4ZKqcZoi7OvbdVRSql1aMfJiSKS4oc+GQwGN+ErvOFAJ2Q/CNS2HwiU5Y8zBlRxw04XEgOqQoUKtGnThjZt2vDII4+QlZXFmjVrWL58Oc8++yw//vgjXVu14qO1a6kohTva3QdMwmYuClQQobvDQc9PPimZo52PjovBIth+DAOB+SKSa6trLCIHlVIXAV8rpTaLyE7XE5VSdwN3AzRq1Cg4vTUYSjNW+Iocq+RaJRP9w18B1EJ7JZ9Fh6WIRcfnyYe/s7/ZcfpKeEsR6kQpbSZbzFwEkZGRXHvttVx77bWMGzeO06dPc+CBB3CsWVPouR+hw1L0dHOsQkSEHswffbTIfSpAfLx/rlNC/OHHcBBoaNtvYNW5YyDaaz4PETlo/d2F9v5u6+5EEXlDRBJFJDG+NLrAGwzBxsr+Nh7tODUReM/aHg/8AdyGjnzaFL1QuBSIsl8jgE5UeRQhBhQrV/otNWjVqlW5NCuLyNxcr+1Oo/0HXvbUoJQ62nnDHzOGVKC5UqoJWiAMRMd7yodS6hL0C8n3trpYIF1EzimlagHXAJP90CeDwTB0KIwdyzi0o5Q7BhR2DZHAOVHZCZUqxQd12zh0HKMEb40CpW4LESUWDCKSo5QaCSxDWyi9LSI/KaWeRS90LLSaDgQ+sBZAnFwKzFRKOdCzl4kisrWkfTIYDOSFryAlxbuKxhNW+IqgBqkLtirFS7RYJ8vRaw6vWftH0Ml0HrcKEFh1WwjwyxqDiCwGFrvU/cNlf5yb8/4HBHieajCUY8aM0bGPirK468Tf2d/CEStaLJmZHtdilqNDYjtpj86q1sNZEQx1W5AxsZIMhrJMMbO/+Wr5U+qxqck8rcXURMcxcpYItE78AueJwVK3BREjGAyGso6V/U1iYvC+zIpWHzmFgp8WecMap7pNKcZBARfscW5O2YMOOAiERt0WBIxgMBjKA8nJfP2Pf7CqVq2gWv6UCkoQLbasqttMPgaDoZzw4jffcOu//kWnHj3CwokqbAiwo11pRElxrBVCTGJioqwrzYnADYYgc+jQIS6//HIOHDhAlSpVCj+hHJL9yivkPPggldEezR4poaNdKFFKrReRQiWZmTEYDOWA2bNn069fPyMUvPD88eOcve46XoiL0/mklTqf2Q60MBDRawpjxpTJmYITIxgMhjKOiPDOO+/wzjvvhLorYcv27duZNm0aGzZsgIYNwyZmUagwgsFgKON8//33KKXo2LFjqLsSlogI9913H0888QQNG1rRfcIkZlGoMILBYCjjvP322wwbNgylVKi7EpbMmzeP3377jQceeCB4N01L0zOSTZt0WI7q1bWz3bBhYTEjMYLBYCgruBlszl1yCf/96COe/fnnUPcuLDl16hQPP/ww8+bNo1KlSoG/YWoqTJgAS5bo/czM88c+/hjGjtV+FWPGaGupEGGskgyG0o6XwSanUiUcublE9ukT8sEmHHnggQc4c+YMb731VuBvNmNGUMKLe8NYJRkM5YFCBpuK2VaUn5QUHTOpFJpYlggvKpsf9u/ngw8+4Keffgp8P5zfky9+EiK63ejRej8U35cvad7CrZjUngaDBD2RfKli7VqdsjMqKl/ea2eqTEdUlHwdGyspTz0VlL5kRkfLcJBGIBeAtAZZbPVntxWBo4qtPFuEFKZFAR9Te5qQGAZDaSQ11e0b6B50lrFYdMC3keiIoXk430TLsip2xgzo3FnPkjIz8+vxATIyUJmZdDpxgt5Tp+r2gWTCBHIyMmgIrAROooPz/RX9fTn5AzhjladtfWXChMD2zw1GMBgMpZEJE/I7X1n8DZ2v+TdgI3oges21UYgGm6BgV9lYqrWfgS7oTHXNgE+sphGAcgrKQAmHtDRYsoQq6IB8CehBtxfQBFhf2Pki2tnuyJHA9M8DRjAYDKUNa7Bxt6awG/0mGoWeMXQHCmjQQzTYBBw3s6gcoA96ID4OvAHcAfxqPy+Qs6hZs9xWH7b6cLmtrjE6L/Iw4Ki9sVIerxMo/CIYlFLdlVLblFI7lFJPuDk+VCl1RCm10SojbMeGKKW2W2WIP/pjMJRpvAwSDwIfAOnoPLtL0MKhACEYbAKOm1nUL8Ah4CH0DKELOn/wf1zPDdQsatOmAqqsbGAQMAS4BKiFzo+8Fz2DOG0dz9e3IOeULrFVklIqApgOdENnwEtVSi2Ugik6PxSRkS7nxgFjgUT0Asx669yylUDVYPAnbgYbJ53Qb8XV0FnIhgC3uGtY1hLYe5lFuSLAlgKVtlmUGwczh8PByZMnOXHiBCdOnOD48eNut133Zxw4cD7TG+BA54+OBKZZdRegB0CAC636umgBUdV5YpBzSvvDXLUDsENEdgEopT5Az958yd18I/CliBy3zv0S/YIz1w/9MhjKJh4S2DvQP567gf+hFzGHo/MST3Z3QllKYO9h9nMxes3lBfSsYQV63eUvbtpm5eSw8Oab+ahx4wKD/alTp7jggguIjY0lLi6O2NjYvBIXF0fNmjVp1qxZgWP1n3gC5s0DtEC6C61GWgx4cqdz+qc77JVBzintD8FQH9hv2z8AXOWmXT+lVCe0au0hEdnv4dz67m6ilLob/T9Po0aN/NBtg6EYhDiUQVZWFn+cO0dtN8eOA/vQlkiVrTIM+DseBENZSmDvYRZVCUgB7gcmod/M/4p+Nq5E5uTQUoScpKR8A3xcXBzVq1cnIiKi6P1q1w4WLoTMTJLRC+FfodOGOlkD1ACaAyeAUUBn9GI5EJKc0sFycPsMmCsi55RS9wDvotV9PiMib6BnySQmJpY+d21D6SZEoQxyc3PZuHEjX3/9NV9//TWrVq3iuWrV+FtEBJVy8yfqrIW2dJkBjEbPGN4FWrm7cFlLYO9hFgX686+07V+NVrG54+Latbl44ED/9WvoUBg7lr3ATLRAqmM7PBO90PskkIZWAXbDRWUSgpzS/lh8Pgg0tO03sOryEJFjInLO2n0TaOfruQZDyPHBLp7MTH28c+cSmT6KCFu3bmXatGkkJSURHx/PnXfeyb59+7j77rvZs2cPD/zwg8e4Ph8DS4F4tGlmJWCq+xuVrQT21at7PLQJyEQvyE9Bm/IO9dTY37MoK6d0Y6UQqx9nbGUQcBvamuys1bfZ2IRHqHJK++IF562gZx270C8rkcCPwOUuberatpOA1dZ2HPqZxFplNxBX2D2N57MhaATBu3jXrl3y73//W2677Ta58MILpUmTJnLXXXfJnDlz5NChQ+5PSkoSUapo/XIWpUT69vXTAwoTJk2S3MhIt593NEgNy6O4O8h2T88lOlpk8mT/923t2qL/D4XY87nEgkHfi57otYOdwFNW3bNAb2t7Atqc+kf0+s8ltnOHAzusMsyX+xnBYAgK1g/6VZB2IJEgQ2w/2nMg/UAaWyENVvj4gz548KC89957Mnz4cElISJA6derI7bffLm+99Zbs2rWrSH0rzmCTXbmyXwebUOJwOGTFihUy4LrrJKM4A6+9REWJpKUFpqNhEr4kqIIh2MUIBkNQsN7KF4B8AnKvG8EwFeRbkDqugsH2Vn7s2DFZsGCB3HfffXLppZdKbGysJCUlybRp02Tr1q3icDiK179iDDY5UVHyaNWqMmvWLD8+qODjcDjk888/l6uvvlqaNWsmb775puT07h3esyjn91VYH5UKWEwrIxgMhpJw+HCB4GtPuQgGe6nvKhhAsiIi5C9XXCFVq1aVHj16yAsvvCDr16+XnJwc//WzGIPNzz//LAkJCfL8888XXyiFiJycHJk3b560adNGWrZsKXPnzj3/PMNIZeOR1FQtgKKitOrK3ofoaF3ft2/A+mIEg8FQEiZNKrlgqFhRdv3tb3Lu3LnA9rUYg83BgweldevWkpyc7F9BFSCysrJk1qxZcvHFF0uHDh3k008/ldzc3IINw0RlUyhpaXo9Y/BgkV699N/JkwOnyrLwVTCYfAwGgzu8eBf7SqWcHJqcPg2RkX7qlAcSE2HBgiIlsK9Xrx7ffPMNffv2pX///rz//vtER0e7vXwoyczM5O2332by5MlcdNFFTJ8+nS5dunhOU+rMXRDihDiFEuY5pY1gMBjc4cUuvkgE07u4iINNtWrVWLx4McOHD6dr164sXLiQmjVrBrCDvnP69GlmzpzJiy++SLt27Zg7dy4dO3b07eTkZO1LMmGCDnOhVP4YStHRWmD07Kn9ThILTWhW7jCCwWBwhxe7+CIR5t7FkZGRzJ49mzFjxnDNNdewdOlSEhISQtaf48eP8+qrrzJt2jS6dOnCkiVLaN26ddEvVIxZlOE8RjAYDO5o1UoPLJmZ5KDDN+daJRP9w6kInAOcyoos61hlrHg3pcS7uEKFCkyaNIn69evz5z//mUWLFtGmTZug9uH3339n6tSpvPnmm/Tp04dVq1bRokWLkl84zFU24YrJx2AwuMPmFTweHdtmIvCetT3eOnaxtX8QHREyGh0+GdDqilLkXTxq1CheeuklbrjhBpYvXx6Ue+7du5eRI0dy2WWXcfbsWX744Qfefvtt/wgFQ7ExMwaDwR1WKANJSWGcCOM8NNvj6fxQhTIoIf3796d27doMGDCAF198kUGDBhVs5IdAgtu2bWPixIksXLiQESNGsHXrVurUqVP4iYbg4IvpUrgVY65qCAZf/vOfcrY4NvHBtIsPEFu2bJFGjRrJpEmTzvs6rF2rnf6iogqY8uaZxSYl6XYe2Lhxo/z1r3+VWrVqyTPPPCPHjh0L0icyiPhurhryQb44xQgGQyBxOBzyzDPPSKNGjWTvmDGlwy4+AOzfv1+uuOIKGTVqlOROm1Yir93//e9/ctNNN0ndunXlhRdekFOnToXoU5VvfBUMRpVkMNjIyMhg+PDh7N69mzVr1mj1RsOG4W8XHwAaNGjAt99+y9sdOpA1fTpRtjDfnYHVnNdF1we2gX4+Vg5lAb5u0YLnn3+eXbt28fjjjzN//nyioqKC/EkMRcUsPhsMFr/99hvXXXcdSilWrFhxXuednAwrV0JSEkRF6cHfTnS0rk9K0u3KgFBwUmP7dh46eDCfUHAyjfPho7e5HkxPJ3PkSGbcdRdDhgxh+/btJCcnG6FQSjAzBoMB+OGHH7jlllu45557ePLJJwt61pZXu/gJE1B257AiUFmEeVdeSYUhQ/zcKUOgUeJtahymJCYmyrp160LdDUMZYcGCBdx77728/vrr9OvXL9TdCR/S0qBxY7ehQTqj4+gL2mT3eauuAFFRsG9f2RSapRCl1HoRKdTV28wYDGWLIphSigj//Oc/mTlzJsuWLePKK68MTZ/DlVmzPB6aBFyGzsz1AXAzsBFo6tpQKX0d42RWqvCLYFBKdQdeBiKAN0Vkosvxh4ERaAfSI8BwEdlrHcsFNltN94lIb3/0yVDOKGJO5oyMDEaMGMH27dtZs2YNdevWDU2/wxkvgQSvsm0PQecoXgzc79owI0Or3QylihILBqVUBDAdncP6AJCqlFooIlttzTYAiSKSrpRKBiYDt1rHMkQkuP73hrLFjBnerYacOvKUFFi2jJP/+Ac3fvIJCQkJrFy5MiyjioYFRQgkqDgfGqQAwQwkaPAL/rBK6gDsEJFdIpKFnln2sTcQkRUikm7trgYa+OG+BsN5oZCe7t2UFPJMKSuNGcPfa9Vi7ty5Rih4w0MgwT+AZei4UDnAHOAboLun64R5IEFDQfwhGOoD+237B6w6T9wFLLHtRyml1imlViulbvFDfwzlhdTU80LBhQ+AS4EqaL33t7ZjMSL0WrECtX59cPpZWmnVSi8eu5AN/B2IB2oBrwIpgNvoRqUkkKAhP0H1Y1BK3QEkAi/Yqhtbq+S3Ay8ppQqsX1nn3m0JkHVHjhwJQm8NYc+ECfnj7Ft8CTwOvAOcRr/NXuTaKCNDn2/wjIcAgPFAKvrZ/oFWAXTzdA0pXYEEDRp/CIaDQEPbfgOrLh9Kqa7AU0BvETnnrBeRg9bfXcB/gbbubiIib4hIoogkxhvTN0Naml5odqM+Ggv8A/gT+h+8Pm6msCI6iYt5yfCMFUgQT9nSCqOUBhI0+EcwpALNlVJNlFKRwEBgob2BUqotMBMtFNJs9bFKqcrWdi3gGsC+aG0wuMeDKWUusA5t+tYM/ZYyEnDrouU0pTR4ZsyYgp7evhIdrc83lDpKLBhEJAf921sG/AzME5GflFLPKqWcpqcvABcAHymlNiqlnILjUmCdUupHYAUw0cWayWBwjwdTysNoHfh89LrCRrRJ3PgCLTGmlL7Qvj0yZQqZERFFOy8mRseMMmkzSyV+8WMQkcVoM2Z73T9s2109nPc/wKxMGYqOB1NK57vt/YDTM+FhtGB43t0JxpSyUN6NjuZAnTo8deKEDo9RjgIJlldMED1D6cSDKWUsWn1k14p71ZAbU0qv7N+/n0cffZSbP/8cVU4DCZZHTEgMQ+nElpPZlWFoE8ruQCVgKtDLzSUygFeXLuX3hx+mR48edOrUicqVKwemv37IehZsRIS77rqLBx54gNatW+vK8hhIsBxigugZSiVbvv6aFt26EelwFDiWDTwAvA9EAX9Fu9q7WuRLVBQ/LlzIZ6tXs2TJErZs2cJ1111Hjx496NGjB02aNCl5R72F6oiO1moZW6iOcOL111/n7bff5n//+x8VK5p3yLKAr0H0Qp6NrTjFZHArv2zevFn69esnderUke0tW4qjsIxi3jKN9e2b79pHjx6VuXPnyp133im1a9eWSy65RB566CH54osvJDMzs+idfe21EmU9CyU7d+6UmjVrytatW0PdFYMfwaT2NJQltm7dKrfeeqtceOGFMmXKFDl79qzOLVzUtJs+5mTOzc2V1NRUefbZZ6Vjx45StWpV6dWrl0yfPl127dpVeIedQqEUpgTNzc2VTp06yQsvvBDqrhj8jBEMhjLBtm3bZNCgQRIfHy8TJ06U06dP528QpAH42LFj+WYTF198sTz44IPuZxOFCKxfQSqDDCqGwAoGU6dOlWuuuUZycnJC2g+D//FVMJg1BkNYsmvXLp577jkWLVrEgw8+yP3330+1atXcNy4suqoTP5lSOhwONmzYwOLFi93TIcq3AAAgAElEQVSvTTzyiI7k6qEvN6AXvhsD77nrY1KSXuT1BT8vam/bto1rrrmG1atX06xZsyKfH8i+GUqOWWMwlEp2794tI0aMkJo1a8rYsWPlxIkTvp2YmqrXDKKiRKKj87+FR0fr+r59A/I2bp9NXFarlmR6WVOYCzIAZKynGQPovqaleb/p2rUiSUm6bVSU+8+blKTb+Uh2drZcddVVMm3atJI9kAD0zeAfMKokQ2li3759cu+990pcXJw89dRTcuzYMX3g8GGRSZNEBg0S6dVL/500yfPAmZYmMnmyyODBuv3gwXq/sIHWT+ROnCi5lSu7HfBPgjQH2V+YYIiO1n32RIAWtSdMmCBdunSR3Nzc4j+AUrzgXh4wgsEQeIo6aLvh4MGDMnLkSImLi5PHH39cjhw5og+U1rfOQYM8DoajQCZa214FA2iB5g6XNZUqLqUCyMhirKls2rRJatWqJXv27Cn+Zy/FC+7lBSMYDIHDD4P2b7/9Jg8++KDExsbKI488IocPHz5/sDS/dfbq5bavG0AuAznnq2Do1avgtQtZ1D5tCYeVngZgD2q0rKwsadu2rbz55pvF/9xr10pmdLQMB2kEcgFIa5DFbvryDAggX/rQN4N/MYLBEBhKOGinpaXJ6NGjJTY2Vh544AE5dOiQ++uH+q2zuLMhDzOGqSAxIBdapQpIFEhbD59pdYsW8p///Ee2b98uDodDXzspyetznwXSBMTh6ftw8dtwMnbsWOnZs+f5+xSHpCQ5Ywm83SC5IJ9ZAmK3rR87QK4AqWsXDF76ZvAvRjAY/E8JBu2jR4/KE088IXFxcXLffffJgQMHCl7fh7fOsyDJIDVBqoFc6++3zpLOhiZNKnie1e/fbOURkH4gaW6eWU7lyvLfXr1kwIAB0rBhQ6lVq5YM6tZNsitW9Pqs/2INzB7buFnUXrduncTHx8vBgweL/8wOH3b7mQWkJch82/6NIJ+DNLYLBg99M/gfIxgM/sVSY7wK0g4kEmSIyyDwFcjFINEgnUH2WPXnKlWSLtWqyT333CN79+71fA8f3joHgdxqDag5IOv8+dbpDxWWl0HSXsbiu1XSgQMH5Mc77pCsiAiP19uDXl/Y5e2+LovamZmZcvnll8t7771XsufmQRj+jvbX+NnanwfS29ouIBgKW3A3+AVfBYMJgGLwDSuNZj10vt9l5E9+cxToC7wJ3Aw8DdyKTvtYMTubhV26UOX11z1f38rIVgUYZ6vuBTQB1qOTzy9EJxV3ejS0czYUOZ+RrTg28k5fCDf5owsgotuNHq337T4RzqxnXvwYIP9ntJMLbLzwQhIqVKCmVVe/fn3qi0Bursfr/Qf4M/pZeSQjg+UvvcTL335LxYoV2bp1K2fPnmXZsmUsX76cihUrFqt0+eQTmrkEM8wGBgFDgEvQaUCfRKdd9dQ3kxsjfPCLYFBKdQdeBiKAN0VkosvxysBs9O/4GHCriOyxjo0B7kL/JkaJyDJ/9MngR2xpNPtaVevQA7STj4HLgQHW/jh0ovhf0ANDlZUrvQ/aHjKpHQZ+ta69Fu0UNhY9ENa17tPP2diZke3RR4v2+VJTYfRopqWnMwvYDNwGOHu0Gi3o1qP/wTsDrwB1ncKhffv8CWnGjIFly3wTMi5UiI5madu2vNiiBbfddhv169dn165dDFu6lKu9nDcbeMKH61/RoAEjRozgp59+4uuvv2bChAlUqVKFnJycQktmZqbb+g4HDuS7hwMYDEQC06y6cVZdgrfOmdwY4YMv0wpvBf1b2YnOtx4J/Ahc5tLmb8Dr1vZA4ENr+zKrfWX0y85OIKKwexpVUpBxoyp4ykWVNArkXhdVwuV2/XJhqgI3i7ZZINeD3G3tP4+2ZhmLtu75L3oRd6v9PE9mnt6wFnUXgHxifQ77Z1tsqUFOotcKhqF15V5VWMVYj0mvUEEejomRCy64QJo1aybVq1eXhg0byrhx4+Rgly4ez1uFXtg+5ct9Bg+Ws2fPSvPmzeWjjz4q+rNyh+27c4AMRasS0233bY1eF3IuvlcAieW8+W6xvztDkcBHVZI/EvV0AHaIyC4RyQI+APq4tOkDvGttzweuV0opq/4DETknIruBHdb1DOGEhzSads4ArqlzqqNVCEDhqgKXjGzu3jqj0fkV/m7VXwf8BfjCdt7xXbvYs2cPmYX0Nw+X2dAtkKfCcdIDPROqBsSg89iuch60q7BsZAwdyqFHHiGncmUcymuqIBxKkRMZyaGHH+bx3bs5deoU27dv5+jRozz88MO8+uqrbKlQAYlyDRyueRetxqta2GeNioKWLXnyySdJTEykf//+hZ3hG61a6WsDyej8vp9xPpsewHJgCzrV6kagHjoJ/H3OBtHROqeDISzwhyqpPrDftn8AuMpTGxHJUUqdRP/+6qNn6vZz6/uhTwZ/4iGNpp0LgFMudadwGay8qQpsGdkErVs8jM4XW8mqb+XmNNchd/Uvv3Bvp04cPnyYmJgY6tatS506dahTp07etr2uyfz5xLi5jje+Qau2nOQ4HKy44w7er1ePnTt3snPnTo4dO0bjxo3p3q4dw37/nSv27YMKFaiYlXX+RCsfQ4WePakwZgxNXfIjV6xYkQcffJD+/fvz9D330OncuQI5JUAPsD6RmcnRBQv4dedO3tu2rQifuBCGDoWxY9lr9aUyUMelf4NcTolAZ9u7wFkhoq9jCAtKzeKzUupu4G6ARo0ahbg35QwPaTTtXM75KSHAWbRe0D6Aek2jacvI5nzr/Ir8b52dgEbABGAMsAZYgU7CA0B0ND0ff5x9jz6KiHD8+HF+//13fvvtN37//fe87Y0bN+bVjduxg79mZxf6+ZxsAp4FPrXVVczKoubBg1wzYAB33nknTZs2pX79+kRERJxvVIKsZw0aNOCdzz/nt44dqb16NRFeW3snds0aFlauTMUPP/Rf+k1rwb1xSopTdVwoe+w7SkHPniawXjjhi77JWwE6Asts+2OAMS5tlgEdre2KaCMW5drW3s5b8dsagx9COpRlTpw4IW+99ZbMbNYsT1+cDZIB8gTIHdZ2Ntp8tJq1ppAB8hjIVfb1gooV5dTTT3u+mWXmucdaR6hM/lAP71nX2QLyJ0unfinIx3YddXFs4d14KruunzjLdpB6ILPd6e7deSr7m7VrxeEaILC4xd9OgQHMjWHwHwTLj8Ea6HehF4+di8+Xu7S5j/yLz/Os7cvJv/i8i2AsPpfWODxBICMjQ+bPny9JSUlSrVo1SUpKkoVvvikOKzDcWGvgtpex1rP7Eu3HEAVyHfk9Xs9FREjTatXk9ttvl1WrVrn3si3Es9drKa4fg5tFb3eCYQ/a9n6Gp/sHa+HUx0XtQSB1QKqiA/f9OxgDcrh4rRs8EjTBoO9FT7RV4U7gKavuWaC3tR0FfIReXF4LXGQ79ynrvG1AD1/uVyLBUJrj8ASI7Oxs+eKLL2To0KFSo0YN6dKli7z11lv5Q177YdA+fvy4vPjii9KsWTNp06aN/Pvf/9aZ2JyE4q3TZnHlaTZ0AOQikBc83TvYzlnW/7C3tKZbQDKt7Z/RlkDrXNsFIhSF+X2FNUEVDMEuxRYMpe2NJoCqLofDIWvWrJFRo0bJhRdeKImJifLiiy96Do3gx0E7NzdXli5dKr169ZKaNWvKQw89JNu3b9cHi/MdVaok0r59sZ6R4/ffJadSJRE8z4bGWduukUzz7h+KcA6pqSI33eTT8/kFPXv40N3xQPQ9hLkxDN4xgsEVH+Lw/BukqfWjvxHkYEnfRotLAFVdP//8szz99NPStGlTad68uYwdO1a2bdvm28kBEKy7du2Sxx57TOLj46V79+7y2WefSe60ab69dTqLawwhH5/RypUr5dprr5UvLrhAcoOtwvIHkyaJeMj9IOiYUtGWUGuLjr5aoF0gZzshzo1hKIgRDK4UEodnBUg8egp+Du3k1CkUP/4ATMX3798vU6ZMkbZt20rdunXloYcektTU1OJF0wyQqiAjI0NmzZol7du3l4SEBHn3/vsls1cv92+d1hu+T4O2mz6sXr1aunXrJk2aNJF3331Xcr7/vnQunHrJ/eAsOSDfgjyHdhh02844lpUbjGCw40P0x0dA/marP2i9ae1w1gVDXeDHN/Jjx47JG2+8IZ07d5bY2FgZPny4LF++3D8J3gOsKli7dq0MGTJEatSoISNvvVX23n//+bfODh3k1QoV3Aby201Blc+ztme0YcMGufnmm6VBgwYyc+ZMycrKOn/T0qZmFPGY+8FduQfkZU/Hg2FRZQgLjGCw40P0x0fQU2/nsQPWIJNiH/ACucDoQ/TSwkJOnz17Vj788EPp06ePVKtWTfr37y8ff/yxZGRkBKbPAVYVHDlyRCZNmiQJCQnSvn17+ewf/xBHTIzH0BVOwZDtZvDLiIiQG2vWlJdeesnz8yhtC6c+zBic5S502BIzYyjfGMFgx4c4PF9aA+6P6Bgvd4MokPeD9QMqJF6P4DnkdK5SsrZhQ6levbp069ZN3nnnHfnjjz8C19cgk5OTIwsXLpTvateWHNvzcDUr9SYYcpWS7N69C79ZaVo49fDCcxhkLnpNIQdkKdrv41N3QsGEuy5X+CoYlG5bukhMTJR169b5fsLNN8OiRXm7DuB2dMiGTzkfcmE68JJV/yAwEVgEXGsd39q0KZ/edRc1atTwWKKj7b66PpKWBo0b54tH9Hd0fJBZ1v4v6CBS9pDTdnIqVuTYhg1ceMUVRb9/acCHZ7QH7QxTD+092Q14AR3lFdDxfPbt883DtgSeykHDzTMBOAL0RzsIOdARaUcB/+fuGkV5JoZSj1JqvYgkFtau1ITEKBE+xOEB7YXnDOr1KzAesA+zjmrVOHXqFPv27eOPP/7gxIkT/PHHH3nlxIkTKKW8Cg57iY2NpUaNGjSZP59a4DWiYWEhpytWqsSFS5ZAWRUMHsJy26kFpAJt0LHd70PH6MmL416UsNzx8UUP3x1sPOR+iAdW+nK+CUVh8ED5EAw+xOHJRHvfXY6O9nc38AA60BcA0dFccdttTChksMjMzMwnKOyCw1m3e/fufPtP/fILfQqJBnoAHZ2yH3AI+B64CR23/FIo+4lOfIjwegHgfBW6EB2VtS46wmtVKJvPqAS5H4iO1ucbDC6UD8HgQ/THm9DqpZ3oQWQY8Jz9GiI+RX+MiorKi9zpMy6qLnfYQ05XJH/I6UudjcpyohMfIry64oyY6rBXlrVn1L49TJnie/Y5JzEx+rzEQrUKhnJI+RAMPkZ/3OTpQKCn3D5EL/Ul5LTX6KWlHdszyrFKrlUy0f/I64EaQHPgBFqv3hmXPBFl8Rk5o6SOHo0jPd17khWl9ExhyhT/RVc1lDn8kaindDBmjP5BFIdAT7ltiU5y0AOdfdDLIX/I6Rx0opgVwI32PpblRCe2ZzQePYOaCLxnbY9HR2Dsjp7xXYGeGc61X6MsP6PkZM58/jmfV6qEVK5c8H89Olo/v6QkWLnSCAWDV8qHVZKToiR8d+Kccgfyh2SzLhkHPONyeCx6ofknYAR6ZtMYeB5IcjYq69YlHixwikJWhQocWb+e+m3a+LFj4cPLL7/M999/zwevvhr+FlWGkOCrVVLIfRKKU8pkdNVQhJwubZTgGTmUki0XXyxxcXHy5JNPysmTJ0P9afxKbm6uNGvWTL777rtQd8UQxhDEnM+li+RkPZVOStJv2eEy5Q5nVVe4UIJnpKKjufy99/jxxx85ePAgLVq04LXXXiO7CNnbwpmlS5dSrVo1rr766lB3xVAW8EV6hFvxWwa3cIv+WIx4PblRUaEPzRBM/BTTaMOGDdK1a1dp0aKFpKSkFC+gYBjRvXt3eeedd0LdDUOYg/F8LqU410EyMvI5LRVAKbIrVWJCzZo8tG0bVatWDV4fQ00RnpE3CxwRYdmyZTz66KPExsYyZcoUOnTo4Fsf0tK0Hn/TJm1KW726XiAfNizoevxt27bRqVMn9u7dS5S1QG8wuCMoawxAHPAlsN36G+umTRu0P9ZP6HXTW23HZgG7gY1WaePLff02YwhXfIzX41i7VkaMGCF9+vSR3NzcUPc6uPgxplFOTo68+eabUq9ePRk4cKDs2rXLc+MwTAt7//33y5NPPhm0+xlKLwRjxqCUmgwcF5GJSqknLMHwuEubFlr+yHalVD20ufmlIvKHUmoWsEhE5hflvmV6xmDHh3g9WVlZdOnSheuvv55nnnG1ZyoH+DGm0dmzZ/nXv/7Fyy+/zLBhw3jqqaeItfs9+Gmm4k9OnTpFQkICmzZtokGDBgG9l6H0E6wZwzagrrVdF9jmwzk/As3l/Iyhf1HvW+ZnDEXk999/l4YNG8r8+fND3ZUywaFDh+Tuu++W+Ph4mTp1qpw7dy5s8zW88sorMmDAgIDew1B2IBhht4E/bNvKvu+hfQd0qKIKcl4wbEOrmKYClX25rxEMBVm3bp3UqlVLfvzxx/OVAcwZXR7YsmWL9OzZU/rUqyfZkZFec2V8CHIJOiPgpejQ6XnCIUAhunNzc6VFixbyzTffBOT6hrKH3wQDOt7cFjelj6sgAE54uU5dSwj8yaVOoZ1U3wX+4eX8u4F1wLpGjRoF+vmVSt5//31JSEiQE198EXZ68NLM4WuukRzwmCvjAEgldP5wB8gidK7lwwH2MVm6dKm0adOm1FtUGYKHr4KhpGsM24DOIvKbUqou8F8RudhNu2rAf4F/iof1BKVUZ2C0iPQq7L7lZo2hGHxy4430+OorKusv13NDEzPHN3zIA7EGuBlIs50WDywEOkLAvNJvuukm+vXrx/Dhwz03CiPrKUPo8XWNoaQObguBIdb2EHTeG9eORAKfALNdhYIlTFBKKeAW9EzEUFxmzOCW774jyuHwLhRAv+ump+vF1BkzgtO/0ogPeSAS0RFuF6LjW6Wgp8B5gQ+deSD8yI4dO0hNTeW2225z3yA1Ffr21UJt7FiYM0dH8J0zB8aNg0aN9PHUVL/2y1A2KKlgmAh0U0ptB7pa+yilEpVSb1pt/oqOATdUKbXRKs5gNXOUUpuBzeg8K+NL2J/yS2oqjB7N9PR0EtED01CXJm8CzdB5C7qj8zrkCQczA3OPD3kgIoA70WHbK1t/ZwJVnA0CkAdi+vTp3HXXXe4zBs6YAZ076wQ+mZkF+5+RoetSUnQ782JgcMUXfVO4FbP47IZCckavAIkH2QJyzjreqbzFWioOvXrlX6OhYK7pL0HiQFJBckHWgtQB2WBrc/Tqq+XQoUN+WQ84ffq0xMXFyd69ewseDFPrKUN4gI9rDOUjH0NZJy0NliwBEfpaVevQenAni4AB6Ax1AE8D9dGJiZqKwOLF2icg3PXOwdaZ+5ArYyN6SuxU3LYHrkJbbTinxmt+/ZWhrVuTnp7ORRddRLNmzWjatGne36ZNm9KoUSMiIiIKvd/s2bPp3LkzjRo1yn/AmjXekZ7OcuAsOiHVY+iovFno2cw6YC86bHtnOD9rbN/eJO4xAOUlUU9Zx0f9tbjZ3gI0haLlQw4FqakwYYIWgJBfPfLxx1qP3qOHDrTXvn2JbpWbm8uaNWv49NNPqfXFF4xE53zwlCCoPVqHuhEtCDYA3wJ/c14wOpqejz1G2qOPcurUKXbu3MmOHTvYuXMnqampfPDBB+zcuZO0tDQaNWrkVmg0adKEqKgoRIRXX32V119/vWDHJ0yAjAzGAG+h1Vq/oAf/tkBL4M/Ag+iXhHxkZOjzFywo0bMzlA2MYCgL+KAH7w4MBO5FZzh7Fm0nnJeZIpzzIRfmcZyRof+mpOj8x8WwtDp79ixffvklCxcuZNGiRdSpU4fevXvT9T//IeqWWyAzk/Hkz5XxHudzZYwD+gOH0RZJTwI3OBuK5KWFrVatGm3btqVt27YF+pCZmcnu3bvzhMb27dtZunQpO3fuZN++fdSuXZu4uDgOHz7M999/z5EjR/KER9WMjLxZ4+W2ayqr7ATaoYUC6HWRfJSmWaMh4BjB4G9CYR7oQz7kruhBrR9wCj1AVAXsQRR+WrWKFdOmkZCQQOPGjUlISAh9cL6iJFeyW1pBocLht99+Y9GiRSxcuJCVK1fSoUMHevfuzdNPP02TJk3ON+zRA1JSGCfCOA/XGmmVAhQhLWxUVBSXXnopl156aYFjOTk57N+/n9tvv52ePXty7Ngx1qxZw86dO9m5cyePAY+fO4czhN7f0Oa0GejZQs9C7074zxoNwcOXhYhwK2G5+Byi4Gr79u2THX/6U6ELpK5lG0gMyHFb3ea2bSU5OVl69Oghl112mcTExEhcXJxceeWV0rdvX3n44YfllVdekYULF8qmTZsCn+xm7VqRmBjJBBkO0sjyLG5tOZM5+/0VyMWWU1lnkD0ePI4dDods3rxZnn/+ebnqqqukRo0aMnDgQHn//ffl+PHjhfajSAu6AfB83rlzp9SqVUvOnj1b4HOd7du3wL1zQL4FeQ4ky+VYfcsgoUB/Bw/2S18N4Qlm8TmIBEHV4eTkyZOsWLGCr776iq+++oqjR4/yUr16NK5UiYrZ2R714DnADvTi8360G/kDQF6IuOhorrjtNl6zvS2KCEePHmXPnj155ZdffmHp0qV5+1FRUflmGK6lWrVqxfqcQJ7OPAdoCKxE571ejLaB3ow2ve2LNsW9Gb2ofiuw2tKZZ3/wAd999x0LFy7k008/xeFw0Lt3b8aPH0+nTp2IjIwsvB/t2+vvrLhpYf20oDt9+nSGDRtGTExMvnqlFDFZWQXaR6DXFN4DZgCjfLnJiRMl76ih1GMEQ0kJoKoDdPTU1atX8+WXX/LVV1+xZcsWOnbsSNeuXZk7dy6tW7emwtGj2pEpO9ujHvxBtEXKTrQKaRjwnGvfLD24E6UU8fHxxMfH097Ngq5dcOzdu5c9e/bw66+/8sUXX+QJjsjISLcCwylIqnuy+rFZWlWBfCqcXkATdJjeY2hh51xMHYd2iPlFhItSUrg8Pp4azZvTu3dvUlJSaNmyJdqfsog4v6sQRVc9c+YM7777Lh49/r1YT+Wgv3efsEeTNZRbjGAoCYWYB65Gv8GuR7+9dQZeAep6MQ8UEbZs2ZI3I/j222+5+OKL6dq1K+PHj+eaa64pmIyldm2f9OCbPH2OIujB859WuOA4duxYvhmHq+CoVKmSW8HRfsWKvEBarhwGfkULhBlAa9uxKmgrq5+AphERrBs5kurj/eQ3mZysv7MJE/RCrVLnZ4OghYGIfpZjxhR/puBmnWpzejo3dehAQkKC+3NatYIFC0jLzORrtPCMRpvMzrUKwDnOW6RloWeUlbGec3S0DlluKPeYDG4loW9fSEnhJxGakd888HN07JwzwI1oCTwS7W28FPSgkpQECxZw4MABli9fnjcrqFKlCt26daNr16785S9/oWbNmoX3JTVVe7EWRdXhJCZG57cOsg27iHD8+PF8gsNZ7v72W252s6ieDfRAD/4zgbvQVkATbW2uAf4Py/N78GCYPdv/nfdjHog8vJjkZihFZKVKRNx0k3uTXCum05HMTPqjY9s7gMZoFdL/Wc0S0D4MdnZb9YGK6WQIH4KSjyFUJSwWnw8fLrjIDPKL5fX6oZuFvfXW4qlzPysiQjo2ayZxcXEyYMAAmTlzpuzcubP4fSpLXq9uPI5zQW4F6WFbTB0FkuzS7gqQ+c79Xr1C/Ul8w/ndKeX9+1LK83dmeb8Xa5HceL+XCzCLzwHGxanMF/PAbyCfjTlK8WHPntSfOpUKFUoatoqQ68H9iovOXNCzg8PoxedKVv3l6HjtTs6i9el5z7k06Mz9tU41Zow2bijOrDE6Wp9vMFDyIHrlFxensteA02iP175otVK+5minshdsdZVycmh44oR/hIKT5GStFkpK0qoB1yBr0dG6PilJtwtHoQBaZ25bS0lGZ3j6DK07d5KE9t5egNaXP4uOanoJkBURwfaoKLKzs4PV66JjrVO5DubH0Z+tClod9L7ree6CHzqtp1yslgrFz9ZThtKPmTEUFzf6b0/mgTvQevGXgWtdTwqEeWBiog5tEAg9eLAYOlSHuUDrxGeihW0dW5OZwCC0UBgJ3IGOUfSBdbyCUoxct47UCy+ke/fu9OnThx49epTMhNYdJXFqtExyXbkPiETPkDYCN6EX2fPNON2FsShLs0ZD6PBF3xRuJSzWGAYN8qivvcvSfYvlbNUYZIYn3a5xKPKMn3TmBw8elNdff1169OghVatWlRtuuEGmT58u+/fvL1n/SurU6GGd6gw6I9w2W90dII+7+5xRUe5Ttaam6s8fFaX74q5vffsGLO2oITwhGDmfQ1XCQjBMmiQSFSWHQeaCnEZ7mi5FexR/ik75eBHIC54Gr+hokcmTQ/1JwpcAeByfOnVKPvroI7njjjvyvLqfeeYZ2bhxY9FCYvtjsdj6H3I95we0F7e97gWQXsX5H0pL08cHD9YL8YMH632T97tcEhTBAMQBXwLbrb+xHtrlomfEG4GFtvom6MyIO4APgUhf7hsWgsF620tD5zWoDlIVbRHzhvWjHafXTKWKSyn0bc9wngBaWmVnZ8uKFSvkwQcflCZNmkhCQoKMGjVKli9fLllZWV77NCgiQupY33lzkH9b9/4JpB1IDatcb9W57ZOHWec3IBe61L0Bcp2nz2tmnQYfCZZgmAw8YW0/AUzy0O6Mh/p5wEBr+3Ug2Zf7hoVgEDHmgcHCH2/nheBwOGTTpk3y3HPPSWJiosTFxcmgQYNk3rx5curUqfMNrVnMFpBM674/WwP5OpATILtBHOgZ5MsgLT3NYtyY5HqaMUzxNGOA0mOSawg5wRIM24C61nZdYJuHdgUEA9rZ8ihQ0drvCCzz5b5hIxjCJLhauSDIOvMDBw7IjBkzpHv37lK1alW58cYb5bXXXpP07t0LCChPvivZINPsg7zry4CHGYNzjeFXW91gPMPn1m8AABNESURBVKwxmBmDoQgESzD8YdtW9n2XdjnoxFGrgVusulrADlubhsAWX+4bNoJBpGw5lZUGQqAzP3nypMybN0+S+/WTDNv3mGwN+oC0Ra8zOY9VB4kAUejopnnfvV196GGNQdCOfAMtIfEdSDV0WtYCbc06laEI+E0woMOtbHFT+rgKAuCEh2vUt/5eBOxBRzQokmBABwRdB6xr1KhRoJ9f0QiCqsMQBkyaJA6XgdxbaOszINNBFnkayD1YJQnIMZA+aEOGhiBzPP1PmXUqQxHwVTAU6lklIl1F5Ao35VPgsFKqLoD1N83DNQ5af3cB/0U7Bx8DaiilnL4UDYCDXvrxhogkikhifLjZ4JcVpzKDdzZtQrlkynP6rhxA+67YqYLOmHcnth+GPVOeM/ihm2ivcUAK2pN7HzoybgGKGfzQYCiMkjq4LQSGoGOYDQE+dW2glIoF0kXknFKqFjrG2WQREaXUCnRGxA88nV9qKAtOZQbveMmU5ym0tQOdPvUgUNtZaXdqNGEsDGFISQXDRGCeUuoutIPqXwGUUonAvSIyArgUmKmUcqBDcEwUka3W+Y8DHyilxqNzqL9Vwv6Envh4kxqxrGLFb0oDj6Gtv0TrSFuh3/b/jk6GlC9Zpz1+U/v2rOrblyvnzCFaq0x9w4SxMASQEgXpEZFjInK9iDS3VE7Hrfp1llBARP4nIi1FpLX19y3b+btEpIOINBORASJyrmQfx2AIIFb8JoVWGzVAD/qjgZeA3sAfwG1AdfRC2k50mPW8qE8REXDsmJ5ZAmvXruWWpUv54+mn9WBfWBIhpc4LBaOSNAQIk4/BYPAVK+cBLusMRcYKDpj5l7/Qf/16/u+NN+jTp48OiDdhAlmffkqFChWoaA/+568kQIZyja/5GEwQPYPBV2yZ8ijJC5UlWCotWcLHlSoReeiQrrfWqbpddhlzbriBBsePm3UqQ0gwgsFgKAolWSx2IQKIyM7Ol1shIyOD1D17iJ80CSq7Bm83GIKDycdgMBSFIuY8OIdOMNQYqAq0AZa4NrLlVti8eTOXXHIJlY1QMIQQIxgMhqKSnHxeOBSyWJyD9txcCZwExqNN9/a4NrRyK/zwww+0bdvW/302GIqAEQwGQ3FwdWq0ZZuzUwUYBySgf2y90CGF17s2FIHFi/l11SquvPLKgHXbYPAFIxgMhuLidGrctw+6dAEfUrQeBn7FJRObE6VotGKFEQyGkGMEg8FQUuLjteWQw+G1WTY6FekQdE7qAmRkUPv332nVqpX/+2gwFAEjGAwGf+AlXAbo0BiD0Xmcp3lpVzc6mipVqvixYwZD0THmqgaDP7DCZbhD0JZJh4HFQCUvl6lk/BQMYYCZMRgM/sAKl+GOZOBn4DN0bCVPZEVE4Ljc7eqDwRBUjGAwGPzB0KFuq/cCM9HJzusAF1hljpu24nBQccSIAHXQYPAdIxgMBn/gIbdCY7QqKRM4YyuDXE4XpVhaoQKXd+4chM4aDN4xgsFg8BdjxhRM0uQjUrkys+rWpbqXtQqDIVgYwWAw+IsihsvIIyaG1IEDqdSxY2D6ZTAUESMYDAZ/UoRwGfbcCvPi4oxjmyFsKJFgUErFKaW+VEptt/7GumnzF6XURlvJVErdYh2bpZTabTvWpiT9MRjCAl9ygFeuDJddBp06weLF3PjeeyTt2JGXwMdgCCUlStSjlJoMHBeRiUqpJ4BYEXncS/s4YAfQQETSlVKzgEUiMr8o9zWJegylBtcc4Lm5OoTGjh16xmBL+uOIitJvaj166PWK9u1D1WtDGcXXRD0lVSX1Ad61tt8FbimkfX9giYiUPJi9wVAacOYAnz1bZ19buRK2boVz5wpkgquQmanrUlKgc2eYMSM0fTaUe0oqGC4Ukd+s7d+BCwtpPxCdM93O80qpTUqpqUopE4TeUDaZMUPnXEhPLzz7m8j5HA1GOBhCQKGCQSn1lVJqi5vSx95OtE7K43+8Uqou0BJYZqseg44n1h6IA7ypoe5WSq1TSq07YvSwhtJEaup5oWBjGpAIVAaGujvPlsDHYAgmhQoGEekqIle4KZ8Ch60B3znwp3m51F+BT0QkL8O5iPwmmnPAO0AHL/14Q0QSRSQx3sSTMZQmJkzQiXhcqAf8HRju7VwrgY/BEExKqkpaiI4ijPX3Uy9tb8NFjWQTKgq9PrGlhP0xGMKLtDRYssSt+qgv+p++prfzrQQ+xlrJEExKKhgmAt2UUtuBrtY+SqlEpdSbzkZKqQTOZzi0M0cptRnYDNRCZz40GMoOs2aV/BpK+ec6BoOPlCjstogcA653U78OGGHb3wPUd9OuS0nubzCEPZs2FbA+KjIZGdrc1WAIEsbz2WAIJIUk8PGZEyf8cx2DwQeMYDAYAom/guLFFggqYDAEDCMYDIZA4iWBTw46HHeuVTKtugJER0PLloHqocFQACMYDIZA4iGBD2hLi2i0xcZ71rZb6wsRr9cxGPyNEQwGQwDJrFaNnxo1ItfNsXFoj1B7GefSxgHk3HCDDq1hMAQJIxgMhgCxevVqrrzySt6tVw9VzAQ+2RERDPjhB7799ls/985g8EyJzFUNhjJNWpr2H9i0SVsXVa+u1wyGDfP6Bp+ens7TTz/NnDlzeOWVVxgwYADq9dfdhsXwSkwMladMYWi9etx6663ceuutPP/888QUlgiomP02GPIQkVJX2rVrJwZDwFi7ViQpSSQqShet5dclOlrXJSXpdi6sXLlSmjVrJgMHDpS0tLT8B197TSQmRkSp/Nd0LUrpdq+9lnfq0aNH5fbbb5fmzZvLd9995/d+G8oHwDrxYYwN+SBfnGIEgyFgFHPwPn36tIwcOVLq1asnn3zyiefrp6aK9O2rB+noaPeDd9++up0bPvnkE6lbt6489NBDcvbs2RL321C+MILBYCgqzsHV28DqWmJi5OcHHpCEhAQZMmSIHDt2zLd7paWJTJ4sn9eqJceuuUZk8GCRyZN1fSEcPXpUbrvtNmnevLmsWrWq2P02wqH8YQSDwVAU1q6VzOhoGQ7SCOQCkNYgi62B9CeQdiA1rHK9VScgZ5WS7156yfd7HT4sMmmSyKBBsqJqVTnWs6fe90Eo2FmwYIHcWLOmnIqI8Njv70G6gsSC1ALpD3LILhw8zEwMZRMjGAyGopCUJGdAxoLsBskF+cwaaHeDnLD+OkByQF4GaWkNsA6ltPqnMAKwBpDZs6ec8tLvxSDzQE6CnAUZBnKjXa3kS78NZQZfBUOJcj6HCpPz2eBX0tKgcWO3we5aAWOBfra6HGAm8CiQZ2MUFaVzOXuy+nFmcMvIcBuCOw+ltKfzlCmQnOzXfgP8AFwHnP7/9s49xorqjuOfb4FFXgryEBV5VUilNoF2JbUpgVpMgDTatWox2kJCagppk8bUBEuaJvYvNLWxqUk1Nil9GIlgkUaoggVpDIvQYKFAePpaSpEaoSEqj/DrH+dcmHt37t5Z9t6598Lvk0yYe+bM7ofZufObOefM72T1di4p8prz2XGanzIprY8C+4DPJ8oGA1cAPwR+kqzcVWrsWk3r2Q3vAptKyz2lt5OCBwbHSUmNfQa4nzD71OcS5ceBE4RpOackdyiXGjtO63nq449ZAIwBBgGTgbWxymngbmAsIGAjZJvWsxveADuAR4HHs3g7lzU9CgyS7pG0S9I5SWUfTyTNkrRX0gFJixPl4yRtieXLJbX0xMdxLoqS1NjngO8ALYQAUMoA4PvAdymZyzYtNXac1vMsF2aqOkHIiXQv8E6s9lVCvqSRyX0rTevZDe8DwGzgSWBa6c/xlN5OCT19YvgXYYbCTeUqSOoFPEU4LycB90maFDcvBX5pZjcCHwELeujjON0nkRrbCCfhUWAl0KfMLucI/QuHk4WlqbET03oOIORBGkv40n0DGAf8g3Ah/xEhOPRK7m8VpvXM6P0uYXrFnxICRyc8pbdTQo8Cg5ntMbO9FapNBQ6Y2SEzOw08D9wZ53m+DVgR6y0jTIHrOPmSSI29ENgD/IWQ7bTAOmA7IT32/4CHgCHATYUKaamxu2i776ofoIiu+gAyeB8mfMl+QHjK6YSn9HZSyKOP4Xrg/cTnjlg2FDhuZmdLyh0nX2JK63cJo43eIjTpDIzLnwh9C/cBVwGfBQ4CfyV0RAPpqbHLTOvZVT9AJ7rqA8jg/SxwiPC0MjCxnCfN27nsqZhET9J6Spo+I0vM7KXqK5X1eBB4EGD06NF5/VrncmDECJg9mzGrVtHV8O17ym2QYM6czkM+U6b1rNR/kUq5PoCM3j8rt6Gct3PZU/GJwcxmmtnNKUvWoHCY0O9WYFQs+xAYLKl3SXk5j2fMrNXMWof7iexUm0ceCc0qF0O/fmH/Ukqm9czaf9GJrvoAauHtXPbk0ZS0FZgQRyC1AHOB1fEtvA2EkXoQnqxzewJxnCJuuSW8VFYppXUp/fuH/VpTBuWVTOtZrh8A4BRhak8Iw1c/JQSSin0AtfB2nCyvR5dbgDZC38Apwo3QK7H8OmBNot4cQl/bQUITVKF8PPAmYTTdC0DfLL/XU2I4NaOaWUqPHj2f+uKdcJ23vmADEssf488bQ6fJ3OxtCPtnyaHk2VWdDOApMRznItm2Lbw/sGZNaIf/5JML2/r1C5fYOXNCM0ylO+677oJVqyq/8ZyGBG1tsHJl/t7OJUnWlBgeGBynHMeOhaGiO3eGDuAhQ0Kzzvz52Ttst26FGTO6N3Nbgf794fXXu38Rr4a3c0nigcFxGoVkrqSsFPoAKiXSc5xukDUw+JzPjlNrChf3amdXdZwa4Un0HCcPFi4MzUJtbWGkUukQ0379QnlbW6jnQcGpI/7E4Dh50doaOpK9D8BpcDwwOE7eDB8ODz9cbwvHKYs3JTmO4zhFeGBwHMdxivDA4DiO4xTRlO8xSDpGyDZcYBjw3zrpdJdmcgX3rTXN5NtMruC+aYwxs4ojHJoyMJQiaVuWlzYagWZyBfetNc3k20yu4L49wZuSHMdxnCI8MDiO4zhFXCqB4Zl6C3SDZnIF9601zeTbTK7gvhfNJdHH4DiO41SPS+WJwXEcx6kSTRkYJF0taZ2k/fHf1ElxJT0maZekPZJ+JUkN7Dpa0qvRdbeksfmanvfI5BvrXimpQ1Lmee2rTRZfSZMlbY7nwg5J387ZcZakvZIOSFqcsr2vpOVx+5Z6/e0TPpV8H4rn6A5Jr0kaUw/PhE+Xvol635Jkkuo28ieLq6R74/HdJem5vB2Bnk3tWa8FeAxYHNcXA0tT6nwFeAPoFZfNwIxGdI3bNgK3x/WBQP9GPbaJuk8CzwG/bvBzYSIwIa5fBxwBBufk14swpe14oAX4JzCppM4i4DdxfS6wvI7HM4vv1wrnJ2Eq64b2jfUGAZuAdqC1UV2BCcB2YEj8PKIerk35xADcCSyL68uAb6bUMeAKwh+gL9CHMC913lR0lTQJ6G1m6wDM7KSZXcSUX1Uhy7FF0peAa4BXc/IqR0VfM9tnZvvj+r+BD4C80phOBQ6Y2SEzOw08T3BOkvw/rAC+Xo+n20hFXzPbkDg/24FROTsmyXJ8AX4OLAU+zVOuhCyu3wOeMrOPAMzsg5wdgSZtSgKuMbMjcf0/hAtUEWa2GdhAuDs8ArxiZnvyUzxPRVfCHe1xSS9K2i7pcUm98lMsoqKvpM8AvwB+nKdYGbIc3/NImkq4WThYa7HI9cD7ic8dsSy1jpmdBU4AQ3Ox60wW3yQLgLU1Neqair6SvgjcYGYv5ymWQpZjOxGYKOkNSe2SZuVml6Bh025LWg+MTNm0JPnBzExSp6FVkm4EbuLC3cw6SdPM7O+N5kr4O0wDpgDvAcuB+cBvq2saqILvImCNmXXkcWNbBd/Cz7kW+AMwz8zOVdfy8kPSA0ArML3eLuWINzFPEL5PzUBvQnPSDMK1a5OkL5jZ8bwlGhIzm1lum6Sjkq41syPxy572uNUGtJvZybjPWuBWoOqBoQquHcBbZnYo7rMK+DI1CgxV8L0VmCZpEaE/pEXSSTMr2/FXZ18kXQm8DCwxs/ZaeJbhMHBD4vOoWJZWp0NSb+Aq4MN89DqRxRdJMwmBebqZncrJLY1KvoOAm4GN8SZmJLBa0h1mlvfE8VmObQewxczOAG9L2kcIFFvzUQw0a1PSamBeXJ8HvJRS5z1guqTekvoQ7mrq0ZSUxXUrMFhSod37NmB3Dm5pVPQ1s/vNbLSZjSU0J/2+VkEhAxV9JbUAfyZ4rsjRDcLfdoKkcdFjLsE5SfL/cDfwN4s9j3Wgoq+kKcDTwB31agNP0KWvmZ0ws2FmNjaer+0E77yDQkXXyCrC0wKShhGalg7lKQk07aikocBrwH5gPXB1LG8FnrULIwCeJgSD3cATjeoaP98O7AB2Ar8DWhrZN1F/PvUdlZTlXHgAOAO8lVgm5+g4B9hH6NdYEsseJVygIAySeAE4ALwJjK/X8czou54wkKNwLFc3sm9J3Y3UaVRSxmMrQtPX7ngtmFsPT3/z2XEcxymiWZuSHMdxnBrhgcFxHMcpwgOD4ziOU4QHBsdxHKcIDwyO4zhOER4YHMdxnCI8MDiO4zhFeGBwHMdxivg/SzMyhWLi3m0AAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of cyclic pattern kernel of size 999 built in 18.78946042060852 seconds ---\n", + "(array([[11., 5., 5., ..., 6., 7., 3.],\n", + " [ 5., 16., 6., ..., 5., 5., 3.],\n", + " [ 5., 6., 8., ..., 4., 5., 3.],\n", + " ...,\n", + " [ 6., 5., 4., ..., 17., 7., 4.],\n", + " [ 7., 5., 5., ..., 7., 15., 4.],\n", + " [ 3., 3., 3., ..., 4., 4., 11.]]), 18.78946042060852)\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "# datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n", + "# datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n", + "# dataset, y = loadDataset(datafile, datafile_y)\n", + "G1 = dataset[1]\n", + "G2 = dataset[2]\n", + "G3 = dataset[3]\n", + "G4 = dataset[4]\n", + "G5 = dataset[5]\n", + "data = [G1, G2, G3, G4, G5]\n", + "nx.draw_networkx(G1)\n", + "plt.show()\n", + "nx.draw_networkx(G2)\n", + "plt.show()\n", + "\n", + "kernel = cyclicpatternkernel(dataset[1:1000], cycle_bound = 1000)\n", + "print(kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 185/185 [00:00<00:00, 2064.69it/s]\n", + "calculate kernels: 100%|██████████| 185/185 [00:00<00:00, 11170.00it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 185 built in 0.10836505889892578 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|██████████| 1000/1000 [00:24<00:00, 36.41it/s]\n", + " Mean performance on train set: 0.018072\n", + "With standard deviation: 0.000000\n", + "\n", + " Mean performance on test set: 0.000000\n", + "With standard deviation: 0.000000\n", + "\n", + "\n", + " accur_test std_test accur_train std_train k_time\n", + "------------ ---------- ------------- ----------- --------\n", + " 0 0 0.0180723 0 0.108365\n" + ] + } + ], + "source": [ + "# acyclic dataset (node labeled, edge labeled, undirected, linear + non-linear, regression)\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " normalize = False , model_type = 'classification')\n", + "\n", + "# kernel_para['k_func'] = 'minmax'\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/run_marginalizedkernel_acyclic.ipynb b/notebooks/run_marginalizedkernel_acyclic.ipynb index 93f1626..9890952 100644 --- a/notebooks/run_marginalizedkernel_acyclic.ipynb +++ b/notebooks/run_marginalizedkernel_acyclic.ipynb @@ -364,6 +364,155 @@ }, { "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a regression problem ---\n", + "\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- marginalized kernel matrix of size 185 built in 1133.0229969024658 seconds ---\n", + "[[ 0.0287062 0.0124634 0.00444444 ..., 0.00606061 0.00606061\n", + " 0.00606061]\n", + " [ 0.0124634 0.01108958 0.00333333 ..., 0.00454545 0.00454545\n", + " 0.00454545]\n", + " [ 0.00444444 0.00333333 0.0287062 ..., 0.00819912 0.00819912\n", + " 0.00975875]\n", + " ..., \n", + " [ 0.00606061 0.00454545 0.00819912 ..., 0.02846735 0.02836907\n", + " 0.02896354]\n", + " [ 0.00606061 0.00454545 0.00819912 ..., 0.02836907 0.02831424\n", + " 0.0288712 ]\n", + " [ 0.00606061 0.00454545 0.00975875 ..., 0.02896354 0.0288712\n", + " 0.02987915]]\n", + "\n", + " Saving kernel matrix to file...\n", + "\n", + " Mean performance on train set: 12.186285\n", + "With standard deviation: 7.038988\n", + "\n", + " Mean performance on test set: 18.024312\n", + "With standard deviation: 6.292466\n", + "\n", + "\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 18.0243 6.29247 12.1863 7.03899 1133.02\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import numpy as np\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.marginalizedKernel import marginalizedkernel, _marginalizedkernel_do\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', itr = 20, p_quit = 0.1)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n", + "# hyper_name = 'p_quit', hyper_range = np.linspace(0.1, 0.9, 9), normalize = False)\n", + "\n", + "%lprun -f _marginalizedkernel_do \\\n", + " kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n", + " normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Timer unit: 1e-06 s\n", + "\n", + "Total time: 828.879 s\n", + "File: ../pygraph/kernels/marginalizedKernel.py\n", + "Function: _marginalizedkernel_do at line 67\n", + "\n", + "Line # Hits Time Per Hit % Time Line Contents\n", + "==============================================================\n", + " 67 def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):\n", + " 68 \"\"\"Calculate marginalized graph kernel between 2 graphs.\n", + " 69 \n", + " 70 Parameters\n", + " 71 ----------\n", + " 72 G1, G2 : NetworkX graphs\n", + " 73 2 graphs between which the kernel is calculated.\n", + " 74 node_label : string\n", + " 75 node attribute used as label.\n", + " 76 edge_label : string\n", + " 77 edge attribute used as label.\n", + " 78 p_quit : integer\n", + " 79 the termination probability in the random walks generating step.\n", + " 80 itr : integer\n", + " 81 time of iterations to calculate R_inf.\n", + " 82 \n", + " 83 Return\n", + " 84 ------\n", + " 85 kernel : float\n", + " 86 Marginalized Kernel between 2 graphs.\n", + " 87 \"\"\"\n", + " 88 # init parameters\n", + " 89 17205 12886.0 0.7 0.0 kernel = 0\n", + " 90 17205 52542.0 3.1 0.0 num_nodes_G1 = nx.number_of_nodes(G1)\n", + " 91 17205 28240.0 1.6 0.0 num_nodes_G2 = nx.number_of_nodes(G2)\n", + " 92 17205 15595.0 0.9 0.0 p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)\n", + " 93 17205 11587.0 0.7 0.0 p_init_G2 = 1 / num_nodes_G2\n", + " 94 \n", + " 95 17205 11663.0 0.7 0.0 q = p_quit * p_quit\n", + " 96 17205 10728.0 0.6 0.0 r1 = q\n", + " 97 \n", + " 98 # initial R_inf\n", + " 99 17205 38412.0 2.2 0.0 R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes\n", + " 100 \n", + " 101 # calculate R_inf with a simple interative method\n", + " 102 344100 329235.0 1.0 0.0 for i in range(1, itr):\n", + " 103 326895 900354.0 2.8 0.1 R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])\n", + " 104 326895 2287346.0 7.0 0.3 R_inf_new.fill(r1)\n", + " 105 \n", + " 106 # calculate R_inf for each pair of nodes\n", + " 107 2653464 3667117.0 1.4 0.4 for node1 in G1.nodes(data = True):\n", + " 108 2326569 7522840.0 3.2 0.9 neighbor_n1 = G1[node1[0]]\n", + " 109 2326569 3492118.0 1.5 0.4 p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)\n", + " 110 24024379 27775021.0 1.2 3.4 for node2 in G2.nodes(data = True):\n", + " 111 21697810 69471941.0 3.2 8.4 neighbor_n2 = G2[node2[0]]\n", + " 112 21697810 32446626.0 1.5 3.9 p_trans_n2 = (1 - p_quit) / len(neighbor_n2) \n", + " 113 \n", + " 114 59095092 52545370.0 0.9 6.3 for neighbor1 in neighbor_n1:\n", + " 115 104193150 92513935.0 0.9 11.2 for neighbor2 in neighbor_n2:\n", + " 116 \n", + " 117 t = p_trans_n1 * p_trans_n2 * \\\n", + " 118 66795868 285324518.0 4.3 34.4 deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \\\n", + " 119 66795868 137934393.0 2.1 16.6 deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])\n", + " 120 66795868 106834143.0 1.6 12.9 R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)\n", + " 121 \n", + " 122 326895 1123677.0 3.4 0.1 R_inf[:] = R_inf_new\n", + " 123 \n", + " 124 # add elements of R_inf up and calculate kernel\n", + " 125 139656 330283.0 2.4 0.0 for node1 in G1.nodes(data = True):\n", + " 126 1264441 1435263.0 1.1 0.2 for node2 in G2.nodes(data = True): \n", + " 127 1141990 1377134.0 1.2 0.2 s = p_init_G1 * p_init_G2 * deltakernel(node1[1][node_label] == node2[1][node_label])\n", + " 128 1141990 1375456.0 1.2 0.2 kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)\n", + " 129 \n", + " 130 17205 10801.0 0.6 0.0 return kernel" + ] + }, + { + "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false diff --git a/notebooks/run_pathkernel_acyclic.ipynb b/notebooks/run_pathkernel_acyclic.ipynb index 33480f3..12df241 100644 --- a/notebooks/run_pathkernel_acyclic.ipynb +++ b/notebooks/run_pathkernel_acyclic.ipynb @@ -2,23 +2,24 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n", "\n", " --- This is a regression problem ---\n", "\n", "\n", - "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- mean average path kernel matrix of size 185 built in 45.52756929397583 seconds ---\n", + " --- mean average path kernel matrix of size 185 built in 29.430902242660522 seconds ---\n", "[[ 0.55555556 0.22222222 0. ..., 0. 0. 0. ]\n", " [ 0.22222222 0.27777778 0. ..., 0. 0. 0. ]\n", " [ 0. 0. 0.55555556 ..., 0.03030303 0.03030303\n", @@ -33,16 +34,16 @@ "\n", " Saving kernel matrix to file...\n", "\n", - " Mean performance on train set: 3.761907\n", - "With standard deviation: 0.702594\n", + " Mean performance on train set: 3.619948\n", + "With standard deviation: 0.512351\n", "\n", - " Mean performance on test set: 14.001515\n", - "With standard deviation: 6.936023\n", + " Mean performance on test set: 18.418852\n", + "With standard deviation: 10.781119\n", "\n", "\n", - " RMSE_test std_test RMSE_train std_train k_time\n", + " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 14.0015 6.93602 3.76191 0.702594 45.5276\n" + " 18.4189 10.7811 3.61995 0.512351 29.4309\n" ] } ], @@ -59,10 +60,10 @@ "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type')\n", "\n", - "kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = True)\n", + "kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = False)\n", "\n", "# %lprun -f _pathkernel_do \\\n", - "# kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = True)" + "# kernel_train_test(datafile, kernel_file_path, pathkernel, kernel_para, normalize = False)" ] }, { @@ -81,7 +82,7 @@ "# without y normalization\n", " RMSE_test std_test RMSE_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 18.4189 10.7811 3.61995 0.512351 37.0017" + " 18.4189 10.7811 3.61995 0.512351 29.4309" ] }, { diff --git a/notebooks/run_spkernel_acyclic.ipynb b/notebooks/run_spkernel_acyclic.ipynb index 8466693..3c7e9d4 100644 --- a/notebooks/run_spkernel_acyclic.ipynb +++ b/notebooks/run_spkernel_acyclic.ipynb @@ -2,44 +2,42 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The line_profiler extension is already loaded. To reload it, use:\n", - " %reload_ext line_profiler\n", "\n", " --- This is a regression problem ---\n", "\n", "\n", - "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", - "--- shortest path kernel matrix of size 185 built in 14.576777696609497 seconds ---\n", - "[[ 3. 1. 3. ..., 1. 1. 1.]\n", - " [ 1. 6. 1. ..., 0. 0. 3.]\n", - " [ 3. 1. 3. ..., 1. 1. 1.]\n", - " ..., \n", - " [ 1. 0. 1. ..., 55. 21. 7.]\n", - " [ 1. 0. 1. ..., 21. 55. 7.]\n", - " [ 1. 3. 1. ..., 7. 7. 55.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + "--- shortest path kernel matrix of size 185 built in 13.3865065574646 seconds ---\n", + "[[ 3. 1. 3. ... 1. 1. 1.]\n", + " [ 1. 6. 1. ... 0. 0. 3.]\n", + " [ 3. 1. 3. ... 1. 1. 1.]\n", + " ...\n", + " [ 1. 0. 1. ... 55. 21. 7.]\n", + " [ 1. 0. 1. ... 21. 55. 7.]\n", + " [ 1. 3. 1. ... 7. 7. 55.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 757.54it/s]\n", " Mean performance on train set: 28.360361\n", "With standard deviation: 1.357183\n", "\n", " Mean performance on test set: 35.191954\n", "With standard deviation: 4.495767\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 771.22it/s]\n", "\n", "\n", - " RMSE_test std_test RMSE_train std_train k_time\n", + " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 35.192 4.49577 28.3604 1.35718 14.5768\n" + " 35.192 4.49577 28.3604 1.35718 13.3865\n" ] } ], diff --git a/notebooks/run_treeletkernel_acyclic.ipynb b/notebooks/run_treeletkernel_acyclic.ipynb index 41ee8d3..59daf4d 100644 --- a/notebooks/run_treeletkernel_acyclic.ipynb +++ b/notebooks/run_treeletkernel_acyclic.ipynb @@ -2,15 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The line_profiler extension is already loaded. To reload it, use:\n", - " %reload_ext line_profiler\n", "\n", " --- This is a regression problem ---\n", "\n", @@ -19,68 +17,34 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- treelet kernel matrix of size 185 built in 0.48417091369628906 seconds ---\n", - "[[ 4.00000000e+00 2.60653066e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 2.60653066e+00 6.00000000e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 1.00000000e+00 1.00000000e+00 4.00000000e+00 ..., 3.00000000e+00\n", - " 3.00000000e+00 3.00000000e+00]\n", - " ..., \n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.80000000e+01\n", - " 1.30548713e+01 8.19020657e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.30548713e+01\n", - " 2.20000000e+01 9.71901120e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 8.19020657e+00\n", - " 9.71901120e+00 1.60000000e+01]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- treelet kernel matrix of size 185 built in 0.47543811798095703 seconds ---\n", + "[[4.00000000e+00 2.60653066e+00 1.00000000e+00 ... 1.26641655e-14\n", + " 1.26641655e-14 1.26641655e-14]\n", + " [2.60653066e+00 6.00000000e+00 1.00000000e+00 ... 1.26641655e-14\n", + " 1.26641655e-14 1.26641655e-14]\n", + " [1.00000000e+00 1.00000000e+00 4.00000000e+00 ... 3.00000000e+00\n", + " 3.00000000e+00 3.00000000e+00]\n", + " ...\n", + " [1.26641655e-14 1.26641655e-14 3.00000000e+00 ... 1.80000000e+01\n", + " 1.30548713e+01 8.19020657e+00]\n", + " [1.26641655e-14 1.26641655e-14 3.00000000e+00 ... 1.30548713e+01\n", + " 2.20000000e+01 9.71901120e+00]\n", + " [1.26641655e-14 1.26641655e-14 3.00000000e+00 ... 8.19020657e+00\n", + " 9.71901120e+00 1.60000000e+01]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 796.45it/s]\n", " Mean performance on train set: 2.688029\n", "With standard deviation: 1.541623\n", "\n", " Mean performance on test set: 10.099738\n", "With standard deviation: 5.035844\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 745.11it/s]\n", "\n", "\n", " rmse_test std_test rmse_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 10.0997 5.03584 2.68803 1.54162 0.484171\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - "\n", - " Loading dataset from file...\n", - "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- treelet kernel matrix of size 185 built in 0.5003015995025635 seconds ---\n", - "[[ 4.00000000e+00 2.60653066e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 2.60653066e+00 6.00000000e+00 1.00000000e+00 ..., 1.26641655e-14\n", - " 1.26641655e-14 1.26641655e-14]\n", - " [ 1.00000000e+00 1.00000000e+00 4.00000000e+00 ..., 3.00000000e+00\n", - " 3.00000000e+00 3.00000000e+00]\n", - " ..., \n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.80000000e+01\n", - " 1.30548713e+01 8.19020657e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 1.30548713e+01\n", - " 2.20000000e+01 9.71901120e+00]\n", - " [ 1.26641655e-14 1.26641655e-14 3.00000000e+00 ..., 8.19020657e+00\n", - " 9.71901120e+00 1.60000000e+01]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on train set: 2.908869\n", - "With standard deviation: 1.267900\n", - "\n", - " Mean performance on test set: 8.307902\n", - "With standard deviation: 3.378376\n", - "\n", - "\n", - " rmse_test std_test rmse_train std_train k_time\n", - "----------- ---------- ------------ ----------- --------\n", - " 8.3079 3.37838 2.90887 1.2679 0.500302\n" + " 10.0997 5.03584 2.68803 1.54162 0.475438\n" ] } ], @@ -99,8 +63,6 @@ "\n", "kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", "\n", - "kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = True)\n", - "\n", "# %lprun -f treeletkernel \\\n", "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)" ] @@ -121,14 +83,58 @@ "# without y normalization\n", " RMSE_test std_test RMSE_train std_train k_time\n", "----------- ---------- ------------ ----------- --------\n", - " 10.0997 5.03584 2.68803 1.54162 0.484171" + " 10.0997 5.03584 2.68803 1.54162 0.484171\n", + "\n", + " \n", + "\n", + "# G0 -> WL subtree h = 0\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 13.9223 2.88611 13.373 0.653301 0.186731\n", + "\n", + "# G0 U G1 U G6 U G8 U G13 -> WL subtree h = 1\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 8.97706 2.90771 6.7343 1.17505 0.223171\n", + " \n", + "# all patterns \\ { G3 U G4 U G5 U G10 } -> WL subtree h = 2 \n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 7.31274 1.96289 3.73909 0.406267 0.294902\n", + "\n", + "# all patterns \\ { G4 U G5 } -> WL subtree h = 3\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 8.39977 2.78309 3.8606 1.58686 0.348912\n", + "\n", + "# all patterns \\ { G5 } \n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 9.47647 4.22113 3.18029 1.5669 0.423638\n", + " \n", + " \n", + " \n", + "# G0, -> WL subtree h = 0\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 13.9223 2.88611 13.373 0.653301 0.186731 \n", + " \n", + "# G0 U G1 U G2 U G6 U G8 U G13 -> WL subtree h = 1\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 8.62431 2.54327 5.63422 0.255002 0.290797\n", + " \n", + "# all patterns \\ { G5 U G10 } -> WL subtree h = 2\n", + " rmse_test std_test rmse_train std_train k_time\n", + "----------- ---------- ------------ ----------- --------\n", + " 10.1294 3.50275 3.69664 1.55116 0.418498" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [ { diff --git a/notebooks/run_treepatternkernel.ipynb b/notebooks/run_treepatternkernel.ipynb new file mode 100644 index 0000000..a5d9ae6 --- /dev/null +++ b/notebooks/run_treepatternkernel.ipynb @@ -0,0 +1,3191 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a regression problem ---\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-10 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.36956548690796 seconds ---\n", + "[[ 13. 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13. ... 29. 29.\n", + " 29. ]\n", + " ...\n", + " [ 20. 20. 29. ... 365.00000001 365.00000001\n", + " 365.00000001]\n", + " [ 20. 20. 29. ... 365.00000001 365.00000001\n", + " 365.00000001]\n", + " [ 20. 20. 29. ... 365.00000001 365.00000001\n", + " 365.00000002]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 91%|█████████▏| 914/1000 [00:01<00:00, 751.28it/s]\n", + " Mean performance on train set: 5.993535\n", + "With standard deviation: 0.356922\n", + "\n", + " Mean performance on test set: 7.464904\n", + "With standard deviation: 1.718585\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 795.88it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-09 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.47467517852783 seconds ---\n", + "[[ 13. 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.00000001 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13. ... 29. 29.\n", + " 29. ]\n", + " ...\n", + " [ 20. 20. 29. ... 365.00000015 365.00000015\n", + " 365.00000015]\n", + " [ 20. 20. 29. ... 365.00000015 365.00000015\n", + " 365.00000015]\n", + " [ 20. 20. 29. ... 365.00000015 365.00000015\n", + " 365.0000002 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 654.33it/s]\n", + " Mean performance on train set: 5.963041\n", + "With standard deviation: 0.374107\n", + "\n", + " Mean performance on test set: 7.375105\n", + "With standard deviation: 1.769252\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 711.24it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-08 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.32968211174011 seconds ---\n", + "[[ 13.00000004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.00000008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.00000004 ... 29.00000004 29.00000004\n", + " 29.00000004]\n", + " ...\n", + " [ 20. 20. 29.00000004 ... 365.00000148 365.00000148\n", + " 365.00000148]\n", + " [ 20. 20. 29.00000004 ... 365.00000148 365.00000148\n", + " 365.00000148]\n", + " [ 20. 20. 29.00000004 ... 365.00000148 365.00000148\n", + " 365.00000202]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 967/1000 [00:01<00:00, 809.48it/s]\n", + " Mean performance on train set: 5.965110\n", + "With standard deviation: 0.378249\n", + "\n", + " Mean performance on test set: 7.350689\n", + "With standard deviation: 1.780556\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 786.78it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-07 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.74151062965393 seconds ---\n", + "[[ 13.0000004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.0000008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.0000004 ... 29.0000004 29.0000004\n", + " 29.0000004]\n", + " ...\n", + " [ 20. 20. 29.0000004 ... 365.0000148 365.0000148\n", + " 365.0000148]\n", + " [ 20. 20. 29.0000004 ... 365.0000148 365.0000148\n", + " 365.0000148]\n", + " [ 20. 20. 29.0000004 ... 365.0000148 365.0000148\n", + " 365.0000202]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 735.76it/s]\n", + " Mean performance on train set: 5.966982\n", + "With standard deviation: 0.382093\n", + "\n", + " Mean performance on test set: 7.350999\n", + "With standard deviation: 1.781470\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 804.24it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-06 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.52131748199463 seconds ---\n", + "[[ 13.000004 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.000008 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.000004 ... 29.000004 29.000004 29.000004]\n", + " ...\n", + " [ 20. 20. 29.000004 ... 365.000148 365.000148 365.000148]\n", + " [ 20. 20. 29.000004 ... 365.000148 365.000148 365.000148]\n", + " [ 20. 20. 29.000004 ... 365.000148 365.000148 365.000202]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 970/1000 [00:01<00:00, 759.32it/s]\n", + " Mean performance on train set: 5.969758\n", + "With standard deviation: 0.386318\n", + "\n", + " Mean performance on test set: 7.351225\n", + "With standard deviation: 1.780522\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 783.42it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1e-05 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.67099857330322 seconds ---\n", + "[[ 13.00004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.00008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.00004 ... 29.00004 29.00004\n", + " 29.00004 ]\n", + " ...\n", + " [ 20. 20. 29.00004 ... 365.00148001 365.00148001\n", + " 365.00148 ]\n", + " [ 20. 20. 29.00004 ... 365.00148001 365.00148001\n", + " 365.00148 ]\n", + " [ 20. 20. 29.00004 ... 365.00148 365.00148\n", + " 365.00202 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 801.70it/s]\n", + " Mean performance on train set: 5.970557\n", + "With standard deviation: 0.390719\n", + "\n", + " Mean performance on test set: 7.348129\n", + "With standard deviation: 1.780293\n", + "\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.0001 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 36.80127692222595 seconds ---\n", + "[[ 13.0004 14. 4. ... 20. 20.\n", + " 20. ]\n", + " [ 14. 20.0008 4. ... 20. 20.\n", + " 20. ]\n", + " [ 4. 4. 13.0004 ... 29.0004 29.0004\n", + " 29.0004 ]\n", + " ...\n", + " [ 20. 20. 29.0004 ... 365.01480072 365.01480072\n", + " 365.0148 ]\n", + " [ 20. 20. 29.0004 ... 365.01480072 365.01480072\n", + " 365.0148 ]\n", + " [ 20. 20. 29.0004 ... 365.0148 365.0148\n", + " 365.0202 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 889.41it/s]\n", + " Mean performance on train set: 5.942495\n", + "With standard deviation: 0.331983\n", + "\n", + " Mean performance on test set: 7.349836\n", + "With standard deviation: 1.781100\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 883.76it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.001 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 35.8681423664093 seconds ---\n", + "[[ 13.004 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.008 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.004 ... 29.004 29.004 29.004 ]\n", + " ...\n", + " [ 20. 20. 29.004 ... 365.148072 365.148072 365.148 ]\n", + " [ 20. 20. 29.004 ... 365.148072 365.148072 365.148 ]\n", + " [ 20. 20. 29.004 ... 365.148 365.148 365.202 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 99%|█████████▉| 988/1000 [00:01<00:00, 886.54it/s]\n", + " Mean performance on train set: 5.933395\n", + "With standard deviation: 0.324965\n", + "\n", + " Mean performance on test set: 7.357745\n", + "With standard deviation: 1.780977\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 888.00it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.01 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 36.001843214035034 seconds ---\n", + "[[ 13.04 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.08 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.04 ... 29.04 29.04 29.04 ]\n", + " ...\n", + " [ 20. 20. 29.04 ... 366.4872 366.4872 366.48 ]\n", + " [ 20. 20. 29.04 ... 366.4872 366.4872 366.48 ]\n", + " [ 20. 20. 29.04 ... 366.48 366.48 367.02 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 978/1000 [00:01<00:00, 863.94it/s]\n", + " Mean performance on train set: 5.940695\n", + "With standard deviation: 0.347431\n", + "\n", + " Mean performance on test set: 7.374269\n", + "With standard deviation: 1.791145\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 878.96it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 0.1 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 36.37146854400635 seconds ---\n", + "[[ 13.4 14. 4. ... 20. 20. 20. ]\n", + " [ 14. 20.8 4. ... 20. 20. 20. ]\n", + " [ 4. 4. 13.4 ... 29.4 29.4 29.4 ]\n", + " ...\n", + " [ 20. 20. 29.4 ... 380.52 380.52 379.8 ]\n", + " [ 20. 20. 29.4 ... 380.52 380.52 379.8 ]\n", + " [ 20. 20. 29.4 ... 379.8 379.8 385.2 ]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 993/1000 [00:01<00:00, 860.40it/s]\n", + " Mean performance on train set: 6.427114\n", + "With standard deviation: 1.293674\n", + "\n", + " Mean performance on test set: 7.329299\n", + "With standard deviation: 1.913634\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 883.01it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.83972358703613 seconds ---\n", + "[[ 17. 14. 4. ... 20. 20. 20.]\n", + " [ 14. 28. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 17. ... 33. 33. 33.]\n", + " ...\n", + " [ 20. 20. 33. ... 585. 585. 513.]\n", + " [ 20. 20. 33. ... 585. 585. 513.]\n", + " [ 20. 20. 33. ... 513. 513. 567.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 979/1000 [00:01<00:00, 616.77it/s]\n", + " Mean performance on train set: 6.624254\n", + "With standard deviation: 1.224196\n", + "\n", + " Mean performance on test set: 7.271336\n", + "With standard deviation: 2.207735\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 630.82it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.50818395614624 seconds ---\n", + "[[5.300e+01 1.400e+01 4.000e+00 ... 2.000e+01 2.000e+01 2.000e+01]\n", + " [1.400e+01 1.000e+02 4.000e+00 ... 2.000e+01 2.000e+01 2.000e+01]\n", + " [4.000e+00 4.000e+00 5.300e+01 ... 6.900e+01 6.900e+01 6.900e+01]\n", + " ...\n", + " [2.000e+01 2.000e+01 6.900e+01 ... 9.045e+03 9.045e+03 1.845e+03]\n", + " [2.000e+01 2.000e+01 6.900e+01 ... 9.045e+03 9.045e+03 1.845e+03]\n", + " [2.000e+01 2.000e+01 6.900e+01 ... 1.845e+03 1.845e+03 2.385e+03]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 8%|▊ | 77/1000 [00:00<00:01, 764.71it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.069543502626658e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1303298666315776e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.70249458866672e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.63992169055093e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.438093960487116e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0002169262936346e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5920339281975188e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5874866272574162e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.0599424240471626e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.468773818521402e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.958334441043603e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 27%|██▋ | 267/1000 [00:00<00:01, 654.82it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/sklearn/linear_model/ridge.py:154: UserWarning: Singular matrix in solving dual problem. Using least-squares solution instead.\n", + " warnings.warn(\"Singular matrix in solving dual problem. Using \"\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.055618175730539e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.7159074038024934e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.895455126720251e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.400306511546424e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.206478316049589e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8083631222444177e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.49051280863482e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.339852738992424e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.277544863160196e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.458523723353626e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 48%|████▊ | 477/1000 [00:00<00:00, 685.70it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6380760737666547e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.3843421259537676e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.890544546973404e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.054758730954765e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.9172765626494813e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4455093698440067e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2914256710839066e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.65667341282596e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.852926745577629e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.285092924342139e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 69%|██████▉ | 694/1000 [00:01<00:00, 712.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.700250453064005e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6205193931367065e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.4925504318417794e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.0111387119813346e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.343123723749221e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0143662852277667e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.206690575125046e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.049999246995425e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.4232350203422674e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.350008400303505e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.251763015291957e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 90%|█████████ | 902/1000 [00:01<00:00, 666.85it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.924869742342744e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4010401637647583e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.5117924740400373e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.626753798403599e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.911227588173856e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0660043401009468e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3099139652029694e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.2680602391853274e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.396574210735164e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 97%|█████████▋| 969/1000 [00:01<00:00, 600.11it/s]\n", + " Mean performance on train set: 6.816974\n", + "With standard deviation: 1.501822\n", + "\n", + " Mean performance on test set: 7.497870\n", + "With standard deviation: 2.368148\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 648.87it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 100.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.09455919265747 seconds ---\n", + "[[4.13000e+02 1.40000e+01 4.00000e+00 ... 2.00000e+01 2.00000e+01\n", + " 2.00000e+01]\n", + " [1.40000e+01 8.20000e+02 4.00000e+00 ... 2.00000e+01 2.00000e+01\n", + " 2.00000e+01]\n", + " [4.00000e+00 4.00000e+00 4.13000e+02 ... 4.29000e+02 4.29000e+02\n", + " 4.29000e+02]\n", + " ...\n", + " [2.00000e+01 2.00000e+01 4.29000e+02 ... 7.35165e+05 7.35165e+05\n", + " 1.51650e+04]\n", + " [2.00000e+01 2.00000e+01 4.29000e+02 ... 7.35165e+05 7.35165e+05\n", + " 1.51650e+04]\n", + " [2.00000e+01 2.00000e+01 4.29000e+02 ... 1.51650e+04 1.51650e+04\n", + " 2.05650e+04]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 10%|▉ | 97/1000 [00:00<00:02, 436.93it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.249229588791739e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6092761314568358e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6033357497241564e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0333531111165975e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.073851980749357e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.497880470461594e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0349745182117167e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 17%|█▋ | 169/1000 [00:00<00:02, 372.64it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.227908145504113e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1894008132724887e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9131474526752795e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0421770253846576e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.864916618602575e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.663676730244888e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 31%|███▏ | 314/1000 [00:00<00:01, 429.29it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6989200751598342e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9988176582222278e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.173259131422707e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.069621878854856e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.087601566853754e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.1827598831940232e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.428244298929586e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1120965359644164e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8329496119281176e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.5128147762765525e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.173155329882729e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 50%|█████ | 501/1000 [00:01<00:00, 516.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.874621241781873e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9047959204426696e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.03000007539236e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.8198639503150797e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.679200342495213e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6211471280327221e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.74280825574767e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.685372827008377e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9723233156997277e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.142362330339379e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.0042133764798303e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.987833375253946e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 71%|███████ | 706/1000 [00:01<00:00, 471.97it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.63949546549065e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.76170805410039e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.3721058293845662e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.178277242767302e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.4883373934010664e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.526360275338589e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.827383891217367e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████ | 807/1000 [00:01<00:00, 457.30it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.052622499085628e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.70793549450487e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8190124240850417e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.349104192126423e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.132340452050677e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 91%|█████████ | 909/1000 [00:01<00:00, 451.90it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.78695142234395e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.8765535280551442e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.9917255115528226e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.781650263544808e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.581768670551366e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0493867289518776e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.9787087068181396e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.756012232435961e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.800283208793992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.468606690086715e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.119459703249427e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 96%|█████████▌| 956/1000 [00:02<00:00, 445.96it/s]\n", + " Mean performance on train set: 6.687664\n", + "With standard deviation: 1.348089\n", + "\n", + " Mean performance on test set: 7.428867\n", + "With standard deviation: 2.647892\n", + "calculate performance: 100%|██████████| 1000/1000 [00:02<00:00, 467.65it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.517051219940186 seconds ---\n", + "[[4.0130000e+03 1.4000000e+01 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [1.4000000e+01 8.0200000e+03 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [4.0000000e+00 4.0000000e+00 4.0130000e+03 ... 4.0290000e+03\n", + " 4.0290000e+03 4.0290000e+03]\n", + " ...\n", + " [2.0000000e+01 2.0000000e+01 4.0290000e+03 ... 7.2148365e+07\n", + " 7.2148365e+07 1.4836500e+05]\n", + " [2.0000000e+01 2.0000000e+01 4.0290000e+03 ... 7.2148365e+07\n", + " 7.2148365e+07 1.4836500e+05]\n", + " [2.0000000e+01 2.0000000e+01 4.0290000e+03 ... 1.4836500e+05\n", + " 1.4836500e+05 2.0236500e+05]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 10%|█ | 102/1000 [00:00<00:02, 330.46it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.374017095746491e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.687507275679712e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.7645414168071277e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8090152927008474e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.474194561968185e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.130856876335615e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 19%|█▉ | 188/1000 [00:00<00:02, 348.40it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.7794644716713837e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.3130943734340723e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.7505566440337117e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3279670378456666e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.318466984022222e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.418445519765442e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 28%|██▊ | 285/1000 [00:00<00:01, 380.47it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.1356199672921913e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6464620684950592e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.179430869121561e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.473887201835687e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.570987725305032e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.854640029504099e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 38%|███▊ | 385/1000 [00:00<00:01, 389.24it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3781828327775562e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.955507306233033e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.109947315270106e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.9378280303294975e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.866942525478256e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 49%|████▉ | 494/1000 [00:01<00:01, 398.83it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.636014964778956e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6552163232757833e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.312233993243073e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.2794244316598437e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.406028628818668e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 60%|██████ | 604/1000 [00:01<00:00, 407.30it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.184686103929999e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.1281587780183657e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.367013528660628e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.155116554595105e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.443029464120917e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.4923079446940085e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.750703061909557e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 71%|███████ | 707/1000 [00:01<00:00, 402.63it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4970615125032324e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.39494402062226e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.818999641865095e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.051275910233908e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.688197813410084e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████ | 806/1000 [00:01<00:00, 404.11it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.43659662072146e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.4284104102664825e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.900304923444742e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.1112688931900636e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.941184656304436e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.746476431972804e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 91%|█████████ | 906/1000 [00:02<00:00, 407.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.327409345420052e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0589341144557062e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.29154188313992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.225020130252359e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.327720201864263e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 95%|█████████▌| 951/1000 [00:02<00:00, 373.87it/s]\n", + " Mean performance on train set: 6.819058\n", + "With standard deviation: 1.410085\n", + "\n", + " Mean performance on test set: 7.249143\n", + "With standard deviation: 2.655536\n", + "calculate performance: 100%|██████████| 1000/1000 [00:02<00:00, 414.03it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.193651783291256e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.19984276961351e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0689487149937185e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.8956869823870564e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.790887064559792e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.01269268989563 seconds ---\n", + "[[4.00130000e+04 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00200000e+04 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00130000e+04 ... 4.00290000e+04\n", + " 4.00290000e+04 4.00290000e+04]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00290000e+04 ... 7.20148036e+09\n", + " 7.20148036e+09 1.48036500e+06]\n", + " [2.00000000e+01 2.00000000e+01 4.00290000e+04 ... 7.20148036e+09\n", + " 7.20148036e+09 1.48036500e+06]\n", + " [2.00000000e+01 2.00000000e+01 4.00290000e+04 ... 1.48036500e+06\n", + " 1.48036500e+06 2.02036500e+06]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 11%|█▏ | 114/1000 [00:00<00:02, 303.84it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6978549111114387e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1828172674052679e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.547017114313022e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.990786470945978e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.8300974250606965e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.727582936838558e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 21%|██ | 208/1000 [00:00<00:02, 342.60it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.569354559683504e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.885865920757663e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.006883677471783e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2491482969368813e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.596699463334369e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.74024827114128e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0002891123411383e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 31%|███ | 311/1000 [00:00<00:01, 371.07it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.478112292477647e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.3626069490970097e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.751107853461428e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.007160521891646e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.592912372477283e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 41%|████▏ | 414/1000 [00:01<00:01, 394.40it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.286966904954778e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.330065717080935e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0639972759819077e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3250852692883386e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.32507703774876e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.514317701906229e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 51%|█████▏ | 514/1000 [00:01<00:01, 411.17it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5186106207732215e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2309907116861648e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.582391203608073e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.638503426307468e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0580872068659216e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 62%|██████▏ | 615/1000 [00:01<00:00, 424.96it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.633316922861427e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.253166879599146e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4688155478710103e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.3252803222906435e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.713726552669558e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.934042177466841e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.468592067289146e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 72%|███████▏ | 716/1000 [00:01<00:00, 433.84it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.66227030386163e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5342528466878185e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.620745355332433e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5797394734563764e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.109079490079661e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.548426603146641e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0484899604694826e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 82%|████████▏ | 816/1000 [00:01<00:00, 439.46it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.080827240476694e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.8279322282295696e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0410721959495632e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.343375151645726e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.351177509861134e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.370583478449445e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 92%|█████████▏| 916/1000 [00:02<00:00, 437.07it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.3677978521118296e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0227997187914302e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.557088603475233e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.657645350184021e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.990628121216557e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 96%|█████████▌| 962/1000 [00:02<00:00, 393.51it/s]\n", + " Mean performance on train set: 6.934306\n", + "With standard deviation: 1.384412\n", + "\n", + " Mean performance on test set: 7.081832\n", + "With standard deviation: 2.624800\n", + "calculate performance: 100%|██████████| 1000/1000 [00:02<00:00, 420.16it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2964047050969517e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.089708488833387e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3234351109582e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.293818265362604e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.44555950404844e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 100000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 35.835275411605835 seconds ---\n", + "[[4.0001300e+05 1.4000000e+01 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [1.4000000e+01 8.0002000e+05 4.0000000e+00 ... 2.0000000e+01\n", + " 2.0000000e+01 2.0000000e+01]\n", + " [4.0000000e+00 4.0000000e+00 4.0001300e+05 ... 4.0002900e+05\n", + " 4.0002900e+05 4.0002900e+05]\n", + " ...\n", + " [2.0000000e+01 2.0000000e+01 4.0002900e+05 ... 7.2001480e+11\n", + " 7.2001480e+11 1.4800365e+07]\n", + " [2.0000000e+01 2.0000000e+01 4.0002900e+05 ... 7.2001480e+11\n", + " 7.2001480e+11 1.4800365e+07]\n", + " [2.0000000e+01 2.0000000e+01 4.0002900e+05 ... 1.4800365e+07\n", + " 1.4800365e+07 2.0200365e+07]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 13%|█▎ | 126/1000 [00:00<00:03, 261.95it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0191112815027622e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.232307074954237e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.097885039345644e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.727869526025791e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 21%|██ | 208/1000 [00:00<00:03, 262.44it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2411064955279154e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.149899322677468e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.642478117522784e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 30%|███ | 305/1000 [00:01<00:02, 265.15it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.785067039039337e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.225934530879337e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.562790184640486e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 41%|████▏ | 414/1000 [00:01<00:01, 297.05it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.135758789917749e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.564182826681079e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.542436590136228e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 50%|█████ | 500/1000 [00:01<00:01, 319.27it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.195315905739342e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.379873880613873e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.630975349505465e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.518071272961898e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 61%|██████▏ | 613/1000 [00:02<00:01, 303.76it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.9460047081015216e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.839254463570563e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.318583704180543e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 73%|███████▎ | 728/1000 [00:02<00:00, 300.34it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.3984244112232524e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.3291156014339405e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.707621088224988e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0690013288979288e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████▏ | 813/1000 [00:02<00:00, 321.97it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.315773170165585e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.138544398203078e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.386508127676171e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 89%|████████▉ | 891/1000 [00:02<00:00, 317.18it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6842499515474312e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.528434456947986e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.856433851414765e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 96%|█████████▌| 957/1000 [00:03<00:00, 259.36it/s]\n", + " Mean performance on train set: 9.394995\n", + "With standard deviation: 1.047066\n", + "\n", + " Mean performance on test set: 8.237631\n", + "With standard deviation: 3.665300\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 299.25it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.056791553686018e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.644703618966645e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.15242050721053e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.77732253074646 seconds ---\n", + "[[4.00001300e+06 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00002000e+06 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00001300e+06 ... 4.00002900e+06\n", + " 4.00002900e+06 4.00002900e+06]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00002900e+06 ... 7.20001480e+13\n", + " 7.20001480e+13 1.48000365e+08]\n", + " [2.00000000e+01 2.00000000e+01 4.00002900e+06 ... 7.20001480e+13\n", + " 7.20001480e+13 1.48000365e+08]\n", + " [2.00000000e+01 2.00000000e+01 4.00002900e+06 ... 1.48000365e+08\n", + " 1.48000365e+08 2.02000365e+08]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 14%|█▎ | 135/1000 [00:00<00:03, 253.06it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.741195162637844e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5965964498458038e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.983361443347492e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.7922291165206923e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.685526054240851e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 23%|██▎ | 230/1000 [00:00<00:02, 265.75it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6848871984797616e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.8811603375005575e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.7514073450053307e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5976314128410034e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.790988115471154e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.477178586927344e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.035215871851048e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 33%|███▎ | 332/1000 [00:01<00:02, 277.35it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.287317576627726e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.886325173924881e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.8372812242318245e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.523017625167697e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.309735186090854e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 43%|████▎ | 431/1000 [00:01<00:02, 258.66it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2092887442597021e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.444285941342485e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6650957319224102e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.778343353100153e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.067496740668901e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.832366528737191e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 53%|█████▎ | 534/1000 [00:01<00:01, 252.20it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2368084459111367e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.625050966790768e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.786992563738048e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.5350081600164477e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.2823286559994256e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.880362578796432e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.1100444346816681e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 64%|██████▎ | 637/1000 [00:02<00:01, 260.77it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.1335408080135885e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4688002952482946e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.6678722631357644e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.288251804550535e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.895017980474164e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 74%|███████▍ | 741/1000 [00:02<00:00, 276.66it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.209157188853246e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.4519382674684447e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.5748326244710203e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.852436562697074e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.536506069365062e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.969036612017947e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 81%|████████▏ | 813/1000 [00:02<00:00, 297.62it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.309383987394578e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.565067260805818e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.516194197490843e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.041064934861363e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.499601220689098e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.051151158798192e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 92%|█████████▏| 915/1000 [00:03<00:00, 298.37it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.7472428919435347e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.703770718809819e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2818242346374262e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.102554831016506e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.599188283622467e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0647003359802031e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 98%|█████████▊| 977/1000 [00:03<00:00, 278.57it/s]\n", + " Mean performance on train set: 9.635245\n", + "With standard deviation: 0.687560\n", + "\n", + " Mean performance on test set: 8.529828\n", + "With standard deviation: 3.580591\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 284.18it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.502409510736216e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0439081147173944e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.4602149061556115e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.9479003259500843e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.34621741763787e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.0245542507930726e-16 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.46095633506775 seconds ---\n", + "[[4.00000130e+07 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00000200e+07 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00000130e+07 ... 4.00000290e+07\n", + " 4.00000290e+07 4.00000290e+07]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00000290e+07 ... 7.20000148e+15\n", + " 7.20000148e+15 1.48000036e+09]\n", + " [2.00000000e+01 2.00000000e+01 4.00000290e+07 ... 7.20000148e+15\n", + " 7.20000148e+15 1.48000036e+09]\n", + " [2.00000000e+01 2.00000000e+01 4.00000290e+07 ... 1.48000036e+09\n", + " 1.48000036e+09 2.02000036e+09]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 12%|█▏ | 122/1000 [00:00<00:03, 231.16it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.200122265640733e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.627297027618617e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.893611596005168e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.1052758523976415e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.158312302718829e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.675650470006637e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 23%|██▎ | 227/1000 [00:00<00:03, 253.57it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2230286797750079e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.0790289882373515e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.6170735381557016e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.90989130271085e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.4735492461286675e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.544548737558878e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 34%|███▍ | 344/1000 [00:01<00:02, 274.36it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.0569387542185164e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.8929851773550792e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.762490810329375e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.031994076021703e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.380795983197089e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 44%|████▎ | 437/1000 [00:01<00:02, 281.37it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.159845331824398e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.592173545119868e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.0031182975801337e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.315102488294594e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.427690586128571e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.36815306216013e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 52%|█████▎ | 525/1000 [00:01<00:01, 277.96it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.096509347533013e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.2037051514926243e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.641239932076709e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.911654384898199e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.195245830759744e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 61%|██████ | 606/1000 [00:02<00:01, 236.45it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.179739003035368e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.121167134816686e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 2.201750997217992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.6143396276266097e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.876412427833992e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 8.158416120381036e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 73%|███████▎ | 727/1000 [00:02<00:01, 211.65it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.355737066205781e-20 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 6.778261189640049e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.992328403436096e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.166526583123927e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.566565138343654e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.272056103177315e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 82%|████████▏ | 822/1000 [00:03<00:00, 223.94it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.2435735437353417e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.471379508084743e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.686971341479104e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.827108937015577e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 93%|█████████▎| 931/1000 [00:03<00:00, 255.06it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.2357280081107672e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.559072238694825e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.45488311322705e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.6616151717441874e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.807701664283496e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculate performance: 98%|█████████▊| 983/1000 [00:04<00:00, 220.81it/s]\n", + " Mean performance on train set: 11.059074\n", + "With standard deviation: 1.323635\n", + "\n", + " Mean performance on test set: 10.964175\n", + "With standard deviation: 3.358726\n", + "calculate performance: 100%|██████████| 1000/1000 [00:04<00:00, 242.35it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 100000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 9.749823872976888e-19 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 5.347208969568296e-18 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 1.9981807042259307e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 3.298232968104139e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 4.391381083354749e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n", + "/home/ljia/.local/lib/python3.5/site-packages/scipy/linalg/basic.py:40: RuntimeWarning: scipy.linalg.solve\n", + "Ill-conditioned matrix detected. Result is not guaranteed to be accurate.\n", + "Reciprocal condition number/precision: 7.328796748008544e-17 / 1.1102230246251565e-16\n", + " RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.91001057624817 seconds ---\n", + "[[4.00000013e+08 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00000020e+08 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00000013e+08 ... 4.00000029e+08\n", + " 4.00000029e+08 4.00000029e+08]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00000029e+08 ... 7.20000015e+17\n", + " 7.20000015e+17 1.48000004e+10]\n", + " [2.00000000e+01 2.00000000e+01 4.00000029e+08 ... 7.20000015e+17\n", + " 7.20000015e+17 1.48000004e+10]\n", + " [2.00000000e+01 2.00000000e+01 4.00000029e+08 ... 1.48000004e+10\n", + " 1.48000004e+10 2.02000004e+10]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 992/1000 [00:03<00:00, 267.91it/s]\n", + " Mean performance on train set: 66.147687\n", + "With standard deviation: 11.979989\n", + "\n", + " Mean performance on test set: 63.221208\n", + "With standard deviation: 13.381090\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 252.05it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 1000000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 38.43676400184631 seconds ---\n", + "[[4.00000001e+09 1.40000000e+01 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [1.40000000e+01 8.00000002e+09 4.00000000e+00 ... 2.00000000e+01\n", + " 2.00000000e+01 2.00000000e+01]\n", + " [4.00000000e+00 4.00000000e+00 4.00000001e+09 ... 4.00000003e+09\n", + " 4.00000003e+09 4.00000003e+09]\n", + " ...\n", + " [2.00000000e+01 2.00000000e+01 4.00000003e+09 ... 7.20000001e+19\n", + " 7.20000001e+19 1.48000000e+11]\n", + " [2.00000000e+01 2.00000000e+01 4.00000003e+09 ... 7.20000001e+19\n", + " 7.20000001e+19 1.48000000e+11]\n", + " [2.00000000e+01 2.00000000e+01 4.00000003e+09 ... 1.48000000e+11\n", + " 1.48000000e+11 2.02000000e+11]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 976/1000 [00:04<00:00, 268.70it/s]\n", + " Mean performance on train set: 96.664827\n", + "With standard deviation: 1.871320\n", + "\n", + " Mean performance on test set: 100.134704\n", + "With standard deviation: 13.845906\n", + "calculate performance: 100%|██████████| 1000/1000 [00:04<00:00, 236.90it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when lmda = 10000000000.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 185 built in 37.32151246070862 seconds ---\n", + "[[4.00e+10 1.40e+01 4.00e+00 ... 2.00e+01 2.00e+01 2.00e+01]\n", + " [1.40e+01 8.00e+10 4.00e+00 ... 2.00e+01 2.00e+01 2.00e+01]\n", + " [4.00e+00 4.00e+00 4.00e+10 ... 4.00e+10 4.00e+10 4.00e+10]\n", + " ...\n", + " [2.00e+01 2.00e+01 4.00e+10 ... 7.20e+21 7.20e+21 1.48e+12]\n", + " [2.00e+01 2.00e+01 4.00e+10 ... 7.20e+21 7.20e+21 1.48e+12]\n", + " [2.00e+01 2.00e+01 4.00e+10 ... 1.48e+12 1.48e+12 2.02e+12]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|█████████▉| 997/1000 [00:03<00:00, 268.42it/s]\n", + " Mean performance on train set: 98.175092\n", + "With standard deviation: 4.720613\n", + "\n", + " Mean performance on test set: 100.144883\n", + "With standard deviation: 13.958659\n", + "calculate performance: 100%|██████████| 1000/1000 [00:03<00:00, 260.49it/s]\n", + "\n", + "\n", + " lmda rmse_test std_test rmse_train std_train k_time\n", + "----------- ----------- ---------- ------------ ----------- --------\n", + " 1e-10 7.4649 1.71858 5.99354 0.356922 37.3696\n", + " 1e-09 7.37511 1.76925 5.96304 0.374107 37.4747\n", + " 1e-08 7.35069 1.78056 5.96511 0.378249 37.3297\n", + " 1e-07 7.351 1.78147 5.96698 0.382093 37.7415\n", + " 1e-06 7.35123 1.78052 5.96976 0.386318 37.5213\n", + " 1e-05 7.34813 1.78029 5.97056 0.390719 37.671\n", + " 0.0001 7.34984 1.7811 5.9425 0.331983 36.8013\n", + " 0.001 7.35775 1.78098 5.9334 0.324965 35.8681\n", + " 0.01 7.37427 1.79115 5.94069 0.347431 36.0018\n", + " 0.1 7.3293 1.91363 6.42711 1.29367 36.3715\n", + " 1 7.27134 2.20774 6.62425 1.2242 37.8397\n", + " 10 7.49787 2.36815 6.81697 1.50182 37.5082\n", + " 100 7.42887 2.64789 6.68766 1.34809 37.0946\n", + " 1000 7.24914 2.65554 6.81906 1.41008 37.5171\n", + " 10000 7.08183 2.6248 6.93431 1.38441 37.0127\n", + "100000 8.23763 3.6653 9.395 1.04707 35.8353\n", + " 1e+06 8.52983 3.58059 9.63525 0.68756 37.7773\n", + " 1e+07 10.9642 3.35873 11.0591 1.32363 37.461\n", + " 1e+08 63.2212 13.3811 66.1477 11.98 37.91\n", + " 1e+09 100.135 13.8459 96.6648 1.87132 38.4368\n", + " 1e+10 100.145 13.9587 98.1751 4.72061 37.3215\n" + ] + } + ], + "source": [ + "# tree pattern kernel, dataset acyclic.\n", + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.treePatternKernel import treepatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, \\\n", + " kernel_type = 'untiln', h = 2)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, treepatternkernel, kernel_para, \\\n", + " hyper_name = 'lmda', hyper_range = np.logspace(-10, 10, num = 21, base = 10), \\\n", + " normalize = False, model_type = 'regression')\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# results\n", + "\n", + "# untiln kernel when h = 2\n", + " lmda rmse_test std_test rmse_train std_train k_time\n", + "----------- ----------- ---------- ------------ ----------- --------\n", + " 1e-10 7.46524 1.71862 5.99486 0.356634 38.1447\n", + " 1e-09 7.37326 1.77195 5.96155 0.374395 37.4921\n", + " 1e-08 7.35105 1.78349 5.96481 0.378047 37.9971\n", + " 1e-07 7.35213 1.77903 5.96728 0.382251 38.3182\n", + " 1e-06 7.3524 1.77992 5.9696 0.3863 39.6428\n", + " 1e-05 7.34958 1.78141 5.97114 0.39017 37.3711\n", + " 0.0001 7.3513 1.78136 5.94251 0.331843 37.3967\n", + " 0.001 7.35822 1.78119 5.9326 0.32534 36.7357\n", + " 0.01 7.37552 1.79037 5.94089 0.34763 36.8864\n", + " 0.1 7.32951 1.91346 6.42634 1.29405 36.8382\n", + " 1 7.27134 2.20774 6.62425 1.2242 37.2425\n", + " 10 7.49787 2.36815 6.81697 1.50182 37.8286\n", + " 100 7.42887 2.64789 6.68766 1.34809 36.3701\n", + " 1000 7.24914 2.65554 6.81906 1.41008 36.1695\n", + " 10000 7.08183 2.6248 6.93431 1.38441 37.5723\n", + "100000 8.021 3.43694 8.69813 0.909839 37.8158\n", + " 1e+06 8.49625 3.6332 9.59333 0.96626 38.4688\n", + " 1e+07 10.9067 3.17593 11.5642 2.07792 36.9926\n", + " 1e+08 61.1524 10.4355 65.3527 13.9538 37.1321\n", + " 1e+09 99.943 13.6994 98.8848 5.27014 36.7443\n", + " 1e+10 100.083 13.8503 97.9168 3.22768 37.096\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xt0FfW5//H3ExBIVBAEFFFAD3jBSulpVCgqIMiRuygKeAOhuIra/myPF0BaFbWg9Whrq1RrLSg3QTwUCqhcgyLxEFuEihVStVZFExVBISCX5/fHTHQn7Fxg7+zJTj6vtfbKXL4z+0NW2M+eme98x9wdERGRYhlRBxARkepFhUFEREpQYRARkRJUGEREpAQVBhERKUGFQURESlBhEBGRElQYRESkBBUGEREpoW7UAQ5H06ZNvU2bNlHHEBFJK6+//vqn7t6sonZpWRjatGlDXl5e1DFERNKKmf2rMu10KklEREpQYRARkRJUGEREpIS0vMaQNAUFMHUqbNgA27dDo0bQoQNcdx00q/D6jIhIjVQ7C8O6dTBpEixZEszv3v3tuuefhzvvhN69Ydw4OPvsaDKKiESk9p1KmjIFunWD+fODghBbFACKioJl8+cH7aZMiSKliEhkatcRw5QpcMstsGtXxW3dg3a33BLMjxlTtdlERKqJ2nPEsG7dQUVhDzAKaA0cDXQElpTerrg46L4JEaklak9hmDQpOE0UYx9wEpADbAfuBa4A3iu9bVFRsL2ISC2QlMJgZk+ZWYGZ/b2M9WZmj5hZvpltMLP/jFk33My2hK/hychzkIKC4EKze4nFRwJ3AW0IfhH9gJOB10tv7w6LF0NhYZXEExGpTpJ1xDAVuLic9b2BduHremAKgJk1Ae4EzgXOAe40s8ZJyhSTbmqlmn0CbAbOjLfSrNL7ERFJZ0kpDO6+Gvi8nCYDgac9kAscY2YtgP8Clrr75+6+DVhK+QXm8GzYcHDvo1L2AlcBw4HT4zUoKoKNG5MeTUSkuknVNYaWwL9j5j8Il5W1/CBmdr2Z5ZlZXuGhntLZvr3c1QeAa4B6wO/Ka7ht26G9r4hIGkqbi8/u/oS7Z7t7drNDvSu5UaOy90vQM+kTYB5wRHn7aZz8s1wiItVNqgrDhwQdgIqdGC4ra3lydegADRrEXTUGeAtYCGSWt4/MTDjrrKRHExGpblJVGBYA14a9kzoB2919K/Ai0MvMGocXnXuFy5JrxIi4i/8FPA6sB44HjgpfM+I1di9zPyIiNUlS7nw2s1lAN6CpmX1A0NPoCAB3/z2wGOgD5AO7gOvCdZ+b2T3AunBXE929vIvYh6d582Dso/nzS3RZbU1wKqlCZtCnjwbWE5Fawdwr9dFYrWRnZ/shP8Ft3bpg7KPKDIdRimdlYTk5kJ19yNuKiFQXZva6u1f4QZY2F58TdvbZ8OCDkJV1SJvtrlOHB48/nsLWrasomIhI9VJ7CgMEA+EVFwez8tuaQVYW9R55hO3DhtGpUyfeeuut1OQUEYlQ7RpdFYLicPbZwdhHixcHBSB2DKXMzOA6RJ8+MG4cGdnZ3Au0a9eObt26MWPGDHr27BlZfBGRqlZ7rjHEU1gYDHOxcWNw81rjxkGX1BEj4l5ozsnJYciQIdxzzz2MHj068fcXEUmhyl5jqH1HDLGaNYNbb610865du/Lyyy/Tt29fNm/ezOTJk6lTp04VBhQRSb3adY0hCdq1a0dubi7r1q3jsssuY+fOnVFHEhFJKhWGw9CkSRNeeuklGjduzAUXXMCHHyb/Zm0RkaioMBymevXq8dRTT3H55ZfTuXNn/va3v0UdSUQkKVQYEmBmjB07loceeohevXqxcOHCqCOJiCSsdl98TpLBgwfTqlUrBg0aRH5+PjfffDNW0X0SIiLVlI4YkuScc87h1Vdf5amnnuKGG25g7969UUcSETksKgxJ1Lp1a9asWcN7771Hv3792F7BA4JERKojFYYka9iwIQsXLqRdu3b84Ac/4N133406kojIIVFhqAJ169bld7/7HT/60Y/4wQ9+wNq1a6OOJCJSaSoMVejHP/4xTz75JAMGDGD27NlRxxERqRT1Sqpiffv2ZdmyZfTv358tW7YwYcIE9VgSkWotKUcMZnaxmb1tZvlmNjbO+ofNbH342mxmX8Ss2x+zbkEy8lQ33/3ud3nttddYsGAB1157LXv27Ik6kohImRIuDGZWB3gU6A20B4aZWfvYNu7+U3fv6O4dgd8Cz8esLipe5+4DEs1TXbVo0YKcnBx27dpFz549+fTTT6OOJCISVzKOGM4B8t39HXf/GpgNDCyn/TBgVhLeN+1kZWUxd+5cunTpQqdOnfjHP/4RdSQRkYMkozC0BP4dM/9BuOwgZtYaOBlYEbO4gZnlmVmumV2ShDzVWkZGBpMnT2b8+PF07dqVFStWVLyRiEgKpbpX0lDgOXffH7OsdfjgiCuBX5vZf8Tb0MyuDwtIXmFhYSqyVqmRI0cye/Zshg0bxh//+Meo44iIfCMZheFD4KSY+RPDZfEMpdRpJHf/MPz5DrAK+F68Dd39CXfPdvfsZnGerpaOunfvzurVq5k0aRK33347Bw4ciDqSiEhSCsM6oJ2ZnWxm9Qg+/A/qXWRmpwONgbUxyxqbWf1wuinQBdiUhExp47TTTiM3N5e1a9dy+eWXs2vXrqgjiUgtl3BhcPd9wE3Ai8BbwBx3f9PMJppZbC+jocBsL/mQ6TOAPDN7A1gJTHb3WlUYAJo2bcrSpUs58sgj6dq1K1u3bo06kojUYlbyczo9ZGdne15eXtQxks7due+++3jiiSdYuHAh3/3ud6OOJCI1iJm9Hl7TLZeGxKhGzIwJEybwq1/9ip49e7Jo0aKoI4lILaTCUA0NGTKEBQsWMHr0aB555BHS8ahORNKXCkM11blzZ9asWcPjjz/OTTfdxL59+6KOJCK1hApDNXbyySfz6quvkp+fT//+/dmxY0fUkUSkFlBhqOYaNWrEokWLaNOmDV26dOFf//pX1JFEpIZTYUgDdevW5bHHHmPUqFF07tyZ1157LepIIlKDqTCkCTPj5ptv5vHHH6dfv37MmTMn6kgiUkPpQT1ppn///ixdupQBAwaQn5/PuHHj9OAfEUkqHTGkoY4dO5Kbm8u8efO47rrr9OAfEUkqFYY0dcIJJ7B69Wq2b99Or169+Oyzz6KOJCI1hApDGjvyyCOZN28e5557Lp06dWLz5s1RRxKRGkCFIc1lZGTwwAMPcNttt3H++eezatWqqCOJSJpTYaghRo8ezcyZM7niiiv405/+FHUcEUlj6pVUg/To0YOcnBz69evHli1buPfee8nIUO0XkUOjT40a5owzziA3N5fVq1czZMgQPfhHRA6ZCkMN1KxZM5YtW0a9evXo1q0bH3/8cdSRRCSNqDDUUA0aNGD69On069ePTp06sXHjxqgjiUiaSEphMLOLzextM8s3s7Fx1o8ws0IzWx++fhizbriZbQlfw5ORRwJmxi9+8Qt++ctfcuGFF7JkyZKoI4lIGki4MJhZHeBRoDfQHhhmZu3jNH3W3TuGryfDbZsAdwLnAucAd5pZ40QzSUlXXnkl8+fPZ+TIkTz66KNRxxGRai4ZvZLOAfLd/R0AM5sNDAQ2VWLb/wKWuvvn4bZLgYuBWUnIJTG6dOnCmjVr6NevH5s3b+ahhx6iTp06JRsVFMDUqbBhA2zfDo0aQYcOcN110KxZJLlFJPWScSqpJfDvmPkPwmWlXWZmG8zsOTM76RC3lSQ45ZRTePXVV9m0aRMDBw7kyy+/DFasWweXXgqtW8Odd8KMGfCXvwQ/77oLWrUK1q9bF2l+EUmNVF18Xgi0cfcOwFJg2qHuwMyuN7M8M8srLCxMesDa4phjjmHx4sW0bNmS8847j8/uuw+6dYP582H37uAVq6goWDZ/ftBuypQoYotICiWjMHwInBQzf2K47Bvu/pm7Fw8B+iTw/cpuG7OPJ9w9292zm+m0RkKOOOIIfv/73/M/bduSOWEC7NoF7iXabAEaAFcXL3AP2t1yi4qDSA2XjMKwDmhnZiebWT1gKLAgtoGZtYiZHQC8FU6/CPQys8bhRede4TKpYpaXR88XXiCrjPU3AmfHW1FcHPLyqi6ciEQq4cLg7vuAmwg+0N8C5rj7m2Y20cwGhM1+YmZvmtkbwE+AEeG2nwP3EBSXdcDE4gvRUsUmTQpOE8UxGzgG6FHWtkVFwfYiUiOZlzqFkA6ys7M9T99YD19BQXChufT1BGAHkA2sIDjnlw9Mj7ePBg3g/ffVW0kkjZjZ6+6eXVE73flcG02dWuaqnwOjCC72lMus3P2ISPrS6Kq10YYNcY8W1gPLgL9VZh9FRaBhNkRqJBWG2mj79riLVwHvAa3C+a+A/QR3Kv413gbbtiU9mohET4WhNmrUKO7i6wm6lBV7kKBQlNk5tbFGLxGpiXSNoTbq0CG4eFxKFnB8zOsognsZ4l5ezsyEs86qwpAiEhUVhtpoxIhKNbuLMnokQXDDWyX3IyLpRYWhNmreHHr3DnoWHQY3gz591FVVpIZSYaitxo0LTgcdht3Av6++usJ2IpKeVBhqq7PPhgcfhKyyBsUoQ1YWecOGcfaYMbzyyitVk01EIqXCUJuNGfNtcajotJJZ0O7BBzl/xgyefvppLr30UmbMmJGarCKSMioMtd2YMZCTA4MGBT2VSp9eyswMlg8aFLQbMwaAXr16sWLFCiZMmMDEiRNJx6FVRCQ+jZUk3yosDIa52LgxuHmtceOgS+qIEWVeaP74448ZMGAAp59+On/4wx+oX79+SiOLSOVVdqwkFQZJ2K5du7jmmmv49NNPef755zn22GOjjiQicWgQPUmZrKws5s6dy7nnnkvnzp3ZsmVL1JFEJAEqDJIUGRkZPPDAA9x6662cf/75vPzyy1FHEpHDpMIgSTV69GieeeYZLrvsMqZPL/O+aRGpxjSIniTdRRddxMqVK+nXrx/5+fnceeed2GHeZS0iqZeUIwYzu9jM3jazfDMbG2f9z8xsk5ltMLPlZtY6Zt1+M1sfvhaU3lbS05lnnklubi5Llizh2muvZc+ePVFHEpFKSrgwmFkd4FGgN9AeGGZm7Us1+xuQ7e4dgOeAB2LWFbl7x/A1AKkxjjvuOFauXElRUREXXXQRn332WdSRRKQSknHEcA6Q7+7vuPvXBM+SHxjbwN1XuvuucDaXSjw5UmqGrKws5syZQ+fOndVjSSRNJKMwtAT+HTP/QbisLKOAJTHzDcwsz8xyzeySJOSRaiYjI4P777//mx5Lq1evjjqSiJQjpb2SzOxqIBv4Vczi1uENF1cCvzaz/yhj2+vDApJXWFiYgrSSbKNHj2b69OkMHjxYPZZEqrFkFIYPgZNi5k8Ml5VgZj2BO4AB7v7NlUh3/zD8+Q7BY4e/F+9N3P0Jd8929+xmeg5A2urZsycrV67k5z//OXfddZfGWBKphpJRGNYB7czsZDOrR/DY4BK9i8zse8DjBEWhIGZ5YzOrH043BboQPHtearDiHksvvPAC11xzjXosiVQzCRcGd98H3AS8CLwFzHH3N81sopkV9zL6FcEjhOeW6pZ6BpBnZm8AK4HJ7q7CUAsU91jas2ePeiyJVDMaRE8ideDAAcaPH8+8efNYtGgRp556atSRRGosDaInaSEjI4PJkydz++23c8EFF6jHkkg1oMIg1cIPf/jDb3osPfPMM1HHEanVNFaSVBs9e/Zk1apV34yxdNddd2mMJZEI6IhBqpX27duzdu1aXnzxRa6++mp2794ddSSRWkeFQaqd4h5Le/fu5aKLLuLTTz+NOpJIraLCINVSZmYms2fP5rzzzqNz585s3rw56kgitYYKg1RbGRkZTJo0ibFjx3L++eeTk5MTdSSRWkGFQaq9UaNGMXPmTC6//HKefvrpqOOI1HjqlSRpoUePHiV6LN19993qsSRSRXTEIGmjffv25ObmsnTpUq666ir1WBKpIioMklaaN2/OihUr2LdvHz179kRDsIsknwqDpJ3iHksXXHABnTt35u233446kkiNosIgaSkjI4Nf/vKXjB8/ngsuuIBVq1ZFHUmkxlBhkLQ2cuRIZs6cyZAhQ9RjSSRJ1CtJ0l5xj6W+ffuyZcsWJk6cqB5LIgnQEYPUCGeccQa5ubksX76cK6+8Uj2WRBKgwiA1RvPmzVm+fDkHDhygR48e6rEkcpiSUhjM7GIze9vM8s1sbJz19c3s2XD9a2bWJmbduHD522b2X8nII7VXZmYms2bNolu3buqxJHKYEi4MZlYHeBToDbQHhplZ+1LNRgHb3L0t8DBwf7hte2AocCZwMfBYuD+Rw5aRkcF9993HHXfcoR5LIochGUcM5wD57v6Ou38NzAYGlmozEJgWTj8H9LDg6uBAYLa773H3d4H8cH8iCbvuuuuYNWsWQ4YMYdq0aRVvICJAcgpDS+DfMfMfhMvitnH3fcB24NhKbguAmV1vZnlmlqdzx1JZF154ITk5OUycOJEJEyZw4MCBqCOJVHtpc/HZ3Z9w92x3z27WrFnUcSSNnH766eTm5rJixQr1WBKphGQUhg+Bk2LmTwyXxW1jZnWBRsBnldxWJGHNmjVjxYoVQHAUoaNOkbIlozCsA9qZ2clmVo/gYvKCUm0WAMPD6cHACnf3cPnQsNfSyUA74P+SkEnkIA0aNGDmzJl0796dTp068Y9//CPqSCLVUsJ3Prv7PjO7CXgRqAM85e5vmtlEIM/dFwB/BJ4xs3zgc4LiQdhuDrAJ2Afc6O77E80kUpbiHktt27ala9euzJ49m+7du0cdS6RaseCLe3rJzs72vLy8qGNImlu5ciVDhw7l/vvvZ8SIEVHHEalyZva6u2dX1E5jJUmt1b17d3Jycr4ZY+mee+4hIyNt+mOIVBn9L5BarbjH0qpVq7jyyispKiqKOpJI5FQYpNZr1qwZy5cvB9AYSyKoMIgA3/ZYuvDCC+nUqRNvvfVW1JFEIqPCIBLKyMjg3nvv5ec//zndunX75r4HkdpGhUGklBEjRvDss88ybNgwnnrqqajjiKSceiWJxNGtWzdWr15N3759yc/P595771WPJak19JcuUobTTjuNtWvXkpOTw7Bhw9RjSWoNFQaRchT3WMrIyODCCy+koKAg6kgiVU6FQaQCDRo0YMaMGfTs2bP8HksFBfDAA3D11dC/f/DzgQdA3V8lzWhIDJFDMG3aNG699VZmzZpFjx49goXr1sGkSbBkSTAfO6x3Zia4Q+/eMG4cnH126kOLhCo7JIYKg8ghWrVqFUOGDGHSpEmM3LMHbrkFioqCAlAWs6BIPPggjBmTurAiMTRWkkgVKe6xNP288/j6iy+ot29fxRu5w65dQREBFQep1nSNQeQwnLZjBxN37jyoKFwNtAAaAqcCT5besLg46IhXqjEVBpHDMWkSFucRoeOA94AdBE+hmgC8XrpRUVFwTUKkmlJhEDlUBQXBheY41xTOBOqH0xa+/lm6kTssXqzeSlJtqTCIHKqpU8tdfQOQBZxOcFqpT7xGZhXuRyQqCRUGM2tiZkvNbEv4s3GcNh3NbK2ZvWlmG8xsSMy6qWb2rpmtD18dE8kjkhIbNpTsklrKY8CXwMvApXx7BFFCURFs3Fgl8UQSlegRw1hgubu3A5aH86XtAq519zOBi4Ffm9kxMetvdfeO4Wt9gnlEqt727RU2qQOcB3wATCmjzTt//Stz5szh5ZdfJj8/n507dyYxpMjhS7S76kCgWzg9DVgF3B7bwN03x0x/ZGYFQDPgiwTfWyQajRpVuuk+4lxjCH26bx9z5sxh69atfPTRR2zdupX69evTokULTjjhBFq0aHHQdPH80UcfnZR/ikg8iRaG49x9azj9MXBceY3N7BygHiX/r9xnZr8gPOJw9z1lbHs9cD1Aq1atEowtkoAOHWDevINOJxUAK4B+QCawDJgVvg6Smck5o0bx3K23frPI3fniiy++KRLFr/fff5/XXnutRAExszKLRux0o0aNMLOq+k1IDVXhnc9mtgw4Ps6qO4Bp7n5MTNtt7n7QdYZwXQuCI4rh7p4bs+xjgmLxBPBPd59YUWjd+SyRKiiA1q0PKgyFwGDgDeAA0Br4CTA63j4aNID334dmzQ757d2dL7/8skShKGt67969lSogTZo0UQGpBZJ257O79yznTT4xsxbuvjX8kI879KSZNQQWAXcUF4Vw38VHG3vM7E/ALRXlEYlc8+bB2Efz55fostoMyKnM9mbQp89hFYVgc6Nhw4Y0bNiQ0047rdy2O3fujFs0Nm3aVGJ+165dHH/88eWexjrhhBNo2rSpnktRCyR6KmkBMByYHP78c+kGZlYP+F/gaXd/rtS64qJiwCXA3xPMI5Ia48bBiy8GdzIfqszMYPsUOPLII2nbti1t27Ytt11RUREff/zxQQVk9erVJeZ37NhB8+bNK7wO0rx5c+rW1Yg7h6ygIOjGvGFD0MmhUaPg1OV11x32F4nDkdAgemZ2LDAHaAX8C7jC3T83s2zgR+7+QzO7GvgT8GbMpiPcfb2ZrSD4omXA+nCbryp6X51Kkupg969/jf/3f5N54EDlN8rKSuuB9L7++uuDCki8I5LPP/+cpk2bVlhAjj/+eI444oio/1nRS9EIvRpdVaQK7du3j4EDBzLsiy+4av16TKOrlrB3714KCgoqLCCFhYU0bty4wusgLVq0oH79uHeEpL8pU1I2Qq9GVxWpIu7ODTfcwIEDBxiyahX2xhvBt73Fi4P/vLGPAC3+ttenT/BtL7vC/5M1whFHHEHLli1p2bJlue32799PYWHhQUXjzTffZNmyZd8s/+STTzj66KPLvPYRW0CysrJS9K9MguKiUJlTkikcoVdHDCKH6L777mPevHnk5OSUvJ+gsDA4P7xxI2zbBo0bw1lnwYgRKT0/XBMdOHCAzz77rNyeWMWvBg0aVKqARH4vyLp10K1biaLwO2AqsBEYFk7HlZUFOTmH/EVDp5JEqsD06dOZMGECa9eupUWLFlHHkVLcnW3btlVYQD766CPq1KlTqQJSZfeCXHrpQT3bnicYjuJFoIhyCoMZDBoU3E9zCFQYRJJsxYoVDBs2jJUrV9K+ffuo40gC3J0dO3aUWTRip/fv31/uXejF04d0L0gZ98IUm0AwnMrU8vZxGPfC6BqDSBJt3LiRoUOHMnfuXBWFGsDMaNSoEY0aNeL0008vt+1XX30Vt2j8/e9/L7F89+7dce8FKT3ftGlTMpIxsm7xCL0xd88niwqDSAU+/PBD+vbty29+8xu6du0adRxJsaOOOop27drRrl27ctsVFRXFLSA5OTkljkh27NjB7Lp1ubScEXorpQpH6FVhECnHjh076NOnDzfeeCPDhg2LOo5UY5mZmZxyyimccsop5bbbs2cP+/v0gRUrEn/TbdsS30ccKgwiZdi7dy+DBw+mS5cu3HbbbVHHkRqifv36kKyOC43jDk2XMA16IhKHuzN69GgaNGjAI488ogHmJLk6dAguHpeyD9gN7A9fu8NlcWVmBt2hq4AKg0gcd999N5s2bWLWrFka80eSb8SIuIvvJRiyfTIwPZy+t6x9uJe5n0SpMIiU8tRTT/HMM8+wcOFCjjzyyKjjSE1UPEJvqSPRuwAv9bor3vYJjtBbERUGkRgvvvgi48ePZ/HixRx3XLnPnRJJzLhxwemgw1HFI/SqMIiE1q9fzzXXXMO8efMqfM6BSMLOPjsYEO9Qx3YqHqG3Csfd0slTEeD999+nf//+PPbYY3Tp0iXqOFJbFA+El6LRVStLRwxS633xxRf06dOHn/3sZwwePDjqOFLbjBkTDIg3aFDQU6n06aXMzGD5oEFBuxQM264jBqnV9uzZw6BBg+jRowc333xz1HGktsrODgbEqyYj9CZUGMysCfAs0AZ4j+AJbgfdimdm+wlGkgV4390HhMtPBmYDxwKvA9e4+9eJZBKpLHdn5MiRNG7cmIceekj3Kkj0mjWrkrGPDlWip5LGAsvdvR2wPJyPp8jdO4avATHL7wcedve2wDZgVIJ5RCrtjjvu4N1332XGjBnUqVMn6jgi1UaihWEgMC2cngZcUtkNLfh6diHw3OFsL5KIxx9/nOeee44FCxaQebhdBkVqqEQLw3HuvjWc/hgoq+N3AzPLM7NcMyv+8D8W+MLdi+/4/gAo/zmAIknwl7/8hbvuuovFixfTtGnTqOOIVDsVXmMws2XA8XFW3RE74+5uZmX1tWrt7h+a2SnACjPbCGw/lKBmdj1wPUCrVq0OZVORb+Tl5TFy5EgWLlxI27Zto44jUi1VWBjcvWdZ68zsEzNr4e5bzawFUFDGPj4Mf75jZquA7wHzgGPMrG541HAi8GE5OZ4AnoDgCW4V5RYp7d1332XAgAH84Q9/4Nxzz406jki1leippAXA8HB6OPDn0g3MrLGZ1Q+nmwJdgE0ePFN0JTC4vO1FkuGzzz6jd+/ejB8/noEDB0YdR6RaS7QwTAYuMrMtQM9wHjPLNrMnwzZnAHlm9gZBIZjs7pvCdbcDPzOzfIJrDn9MMI/IQXbv3s0ll1xC//79uemmm6KOI1LtmZd3C3Y1lZ2d7Xl5eVHHkDRw4MABhg0bhpkxc+ZMMjJ0s7/UXmb2urtXOMiS7nyWGu22225j69atvPTSSyoKIpWkwiA11m9/+1sWLVrEmjVraBDnaVkiEp8Kg9RI8+fPZ/LkyaxZs4YmTZpEHUckragwSI2Tm5vL6NGjeeGFF2jTpk3UcUTSjk66So2Sn5/PoEGDmDp1Kt///vejjiOSllQYpMYoLCykd+/e3H333fTt2zfqOCJpS4VBaoRdu3YxYMAArrjiCq6//vqo44ikNRUGSXv79+/nqquuom3bttx7771RxxFJe7r4LGnN3fnpT3/K9u3befbZZ/WwHZEkUGGQtPbwww+zYsUKXnnlFerVqxd1HJEaQYVB0tbcuXN5+OGHefXVVznmmGOijiNSY6gwSFp65ZVXuPHGG3nppZc46aSToo4jUqPo4rOknbfffpvBgwczffp0OnbsGHUckRpHhUHSyieffELv3r2ZPHkyvXr1ijoCAnzxAAAJhklEQVSOSI2kwiBpY+fOnfTr14/hw4czYsSIqOOI1FgqDJIW9u3bx9ChQznrrLP4xS9+EXUckRpNhUGqPXfnxz/+MV9//TWPP/647lUQqWIJFQYza2JmS81sS/izcZw23c1sfcxrt5ldEq6bambvxqzTlUQ5yP3338/atWuZO3cuRxxxRNRxRGq8RI8YxgLL3b0dsDycL8HdV7p7R3fvCFwI7AJeimlya/F6d1+fYB6pYWbOnMmUKVNYtGgRDRs2jDqOSK2QaGEYCEwLp6cBl1TQfjCwxN13Jfi+UgusWrWKm2++mUWLFtGyZcuo44jUGokWhuPcfWs4/TFwXAXthwKzSi27z8w2mNnDZlY/wTxSQ7z55psMGTKEZ599lu985ztRxxGpVSq889nMlgHHx1l1R+yMu7uZeTn7aQGcBbwYs3gcQUGpBzwB3A5MLGP764HrAVq1alVRbKmuCgpg6lTYsAG2b4dGjaBDB7juOmjWDICPPvqIvn378tBDD9G9e/do84rUQuZe5md5xRubvQ10c/et4Qf/Knc/rYy2/w84093jDpZvZt2AW9y9X0Xvm52d7Xl5eYedWyKwbh1MmgRLlgTzu3d/uy4zE9yhd292/uQnnPfTn3L55Zczfvz4aLKK1FBm9rq7Z1fULtFTSQuA4eH0cODP5bQdRqnTSGExwYL+h5cAf08wj1RHU6ZAt24wf35QEGKLAkBREezejc+fT50ePbjlqKMYN25cJFFFJPHCMBm4yMy2AD3Decws28yeLG5kZm2Ak4CcUtvPMLONwEagKaCnrNQ0U6bALbfArl3BUUE5zJ0GBw5w5V//iv3+9ykKKCKlJXQqKSo6lZQm1q0LjhR2leyE9jkwiqDPclNgEnBl6W2zsiAnB7IrPOoVkUpK1akkkbJNmhScJirlRoLeBp8AM4AxwJulGxUVBduLSMqpMEjVKCgILjSXOiLdCcwD7gGOAs4DBgDPlN7eHRYvhsLCFIQVkVgqDFI1pk6Nu3gzQR/pU2OWfZc4RwwAZmXuR0SqjgqDVI0NGw7ufQR8BZQe2KIR8GW8fRQVwcaNyc8mIuVSYZCqsX173MVHATtKLdsBHF3WfrZtS14mEakUFQapGo0axV18KrAP2BKz7A3gzLL20/igAXtFpIqpMEjV6NABGjQ4aPGRwKXALwguRK8huCvymnj7yMyEs86qwpAiEo8Kg1SNch69+RhQBDQnuB1+CmUcMbiXux8RqRoqDFI1mjeH3r2DnkWlNAHmExwxvE+cm9sg2K5Pn28G1hOR1FFhkKozblxwOuhwZGYG24tIyqkwSNU5+2x48MFgeItDkZUVbKfhMEQiUeHzGEQSMmZM8POWW4L7Esobm8ssOFJ48MFvtxORlNMRg1S9MWOCAfEGDQp6KpU+vZSZGSwfNChop6IgEikdMUhqZGfDvHnB2EdTpwZ3NG/bFtyncNZZQe8jXWgWqRZUGCS1mjWDW2+NOoWIlEOnkkREpAQVBhERKUGFQURESkjLR3uaWSHwrzJWNwU+TWGcZFDm1Ei3zOmWF5Q5VQ43c2t3r7CXR1oWhvKYWV5lnmlanShzaqRb5nTLC8qcKlWdWaeSRESkBBUGEREpoSYWhieiDnAYlDk10i1zuuUFZU6VKs1c464xiIhIYmriEYOIiCQg7QuDmTUxs6VmtiX8GfchwWbWysxeMrO3zGyTmbVJbdISWSqVOWzb0Mw+MLPfpTJjnBwVZjazjma21szeNLMNZjYkgpwXm9nbZpZvZmPjrK9vZs+G61+L8u8gJlNFmX8W/s1uMLPlZtY6ipylMpWbOabdZWbmZhZ5r5/KZDazK8Lf9ZtmNjPVGePkqehvo5WZrTSzv4V/H32S8sbuntYv4AFgbDg9Fri/jHargIvC6aOArOqeOVz/G2Am8Lvq/nsGTgXahdMnAFuBY1KYsQ7wT+AUoB7wBtC+VJsbgN+H00OBZyP+vVYmc/fiv1dgTDpkDtsdDawGcoHs6p4ZaAf8DWgczjdPg8xPAGPC6fbAe8l477Q/YgAGAtPC6WnAJaUbmFl7oK67LwVw96/cfVfqIh6kwswAZvZ94DjgpRTlKk+Fmd19s7tvCac/AgqAVA6Zeg6Q7+7vuPvXwGyC3LFi/x3PAT3M4jx/NHUqzOzuK2P+XnOBE1OcsbTK/J4B7gHuB3anMlwZKpN5NPCou28DcPeCFGcsrTKZHWgYTjcCPkrGG9eEwnCcu28Npz8m+CAt7VTgCzN7Pjzk+pWZ1UldxINUmNnMMoD/AW5JZbByVOb3/A0zO4fgW84/qzpYjJbAv2PmPwiXxW3j7vuA7cCxKUkXX2UyxxoFLKnSRBWrMLOZ/SdwkrsvSmWwclTm93wqcKqZrTGzXDO7OGXp4qtM5ruAq83sA2Ax8ONkvHFaDLttZsuA4+OsuiN2xt3dzOJ1s6oLnA98j+D5888CI4A/Jjfpt5KQ+QZgsbt/kKovtEnIXLyfFsAzwHB3P5DclLWXmV0NZANdo85SnvBLzUME/8fSSV2C00ndCI7KVpvZWe7+RaSpyjcMmOru/2NmnYFnzOw7if6/S4vC4O49y1pnZp+YWQt33xp+IMU7/PsAWO/u74TbzAc6UYWFIQmZOwPnm9kNBNdE6pnZV+5e5oW+RCUhM2bWEFgE3OHuuVUUtSwfAifFzJ8YLovX5gMzq0tw+P1ZauLFVZnMmFlPggLd1d33pChbWSrKfDTwHWBV+KXmeGCBmQ1w97yUpSypMr/nD4DX3H0v8K6ZbSYoFOtSE/Eglck8CrgYwN3XmlkDgnGUEjoNVhNOJS0AhofTw4E/x2mzDjjGzIrPd18IbEpBtrJUmNndr3L3Vu7ehuB00tNVWRQqocLMZlYP+F+CrM+lMFuxdUA7Mzs5zDKUIHes2H/HYGCFh1fuIlJhZjP7HvA4MKAanPeGCjK7+3Z3b+rubcK/31yC7FEVBajc38Z8gqMFzKwpwamld1IZspTKZH4f6AFgZmcADYDChN85yqvuyXgRnB9eDmwBlgFNwuXZwJMx7S4CNgAbgalAveqeOab9CKLvlVRhZuBqYC+wPubVMcU5+wCbCa5t3BEum0jwwUT4H2cukA/8H3BKlL/XSmZeBnwS8ztdUN0zl2q7ioh7JVXy92wEp8A2hZ8TQ9Mgc3tgDUGPpfVAr2S8r+58FhGREmrCqSQREUkiFQYRESlBhUFEREpQYRARkRJUGEREpAQVBhERKUGFQURESlBhEBGREv4/vYcUKfE34CAAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, {'atom': 'C', 'label': 'C'}), (1, {'atom': 'C', 'label': 'C'}), (2, {'atom': 'C', 'label': 'C'}), (3, {'atom': 'C', 'label': 'C'}), (4, {'atom': 'S', 'label': 'S'})]\n", + "\n", + " --- kernel matrix of tree pattern kernel of size 39 built in 3.5270774364471436 seconds ---\n", + "(array([[1.99007809e+036, 4.00000000e+000, 4.00000000e+000, ...,\n", + " 1.00000000e+001, 1.00000000e+001, 1.00000000e+001],\n", + " [4.00000000e+000, 6.37886713e+019, 4.34000000e+002, ...,\n", + " 6.37886713e+019, 6.37886713e+019, 6.37886713e+019],\n", + " [4.00000000e+000, 4.34000000e+002, 1.99007809e+036, ...,\n", + " 4.40000000e+002, 4.40000000e+002, 4.40000000e+002],\n", + " ...,\n", + " [1.00000000e+001, 6.37886713e+019, 4.40000000e+002, ...,\n", + " 2.94561201e+119, 1.16903692e+080, 4.42354433e+082],\n", + " [1.00000000e+001, 6.37886713e+019, 4.40000000e+002, ...,\n", + " 1.16903692e+080, 4.21212139e+264, 1.66634383e+080],\n", + " [1.00000000e+001, 6.37886713e+019, 4.40000000e+002, ...,\n", + " 4.42354433e+082, 1.66634383e+080, 5.17763068e+117]]), 3.5270774364471436)\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.graphfiles import loadDataset\n", + "from pygraph.utils.utils import kernel_train_test\n", + "\n", + "from pygraph.kernels.treePatternKernel import treepatternkernel, _treepatternkernel_do\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "\n", + "dataset, y = loadDataset(datafile)\n", + "G1 = dataset[100]\n", + "G2 = dataset[20]\n", + "data = [G1, G2]\n", + "# nx.draw_networkx(G1)\n", + "# plt.show()\n", + "# print(G1.nodes(data=True)20\n", + "nx.draw_networkx(G2)\n", + "plt.show()\n", + "print(G2.nodes(data=True))\n", + "\n", + "\n", + "%lprun -f _treepatternkernel_do \\\n", + " kernel = treepatternkernel(dataset[1:40], node_label = 'atom', edge_label = 'bond_type', labeled = True, \\\n", + " kernel_type = 'untiln', lmda = 1, h = 10)\n", + "\n", + "print(kernel)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- This is a classification problem ---\n", + "\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "retrieve patterns: 100%|██████████| 185/185 [00:00<00:00, 2064.69it/s]\n", + "calculate kernels: 100%|██████████| 185/185 [00:00<00:00, 11170.00it/s]\n", + "\n", + " --- kernel matrix of cyclic pattern kernel of size 185 built in 0.10836505889892578 seconds ---\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|██████████| 1000/1000 [00:24<00:00, 36.41it/s]\n", + " Mean performance on train set: 0.018072\n", + "With standard deviation: 0.000000\n", + "\n", + " Mean performance on test set: 0.000000\n", + "With standard deviation: 0.000000\n", + "\n", + "\n", + " accur_test std_test accur_train std_train k_time\n", + "------------ ---------- ------------- ----------- --------\n", + " 0 0 0.0180723 0 0.108365\n" + ] + } + ], + "source": [ + "%load_ext line_profiler\n", + "\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", + "\n", + "import numpy as np\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", + "\n", + "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", + " normalize = False , model_type = 'classification')\n", + "\n", + "# kernel_para['k_func'] = 'minmax'\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", + "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", + "\n", + "# kernel_para['depth'] = 10\n", + "# %lprun -f untildpathkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/run_weisfeilerLehmankernel_acyclic.ipynb b/notebooks/run_weisfeilerLehmankernel_acyclic.ipynb index 1665c60..0dbb8f9 100644 --- a/notebooks/run_weisfeilerLehmankernel_acyclic.ipynb +++ b/notebooks/run_weisfeilerLehmankernel_acyclic.ipynb @@ -19,22 +19,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.38979601860046387 seconds ---\n", - "[[ 5. 6. 4. ..., 20. 20. 20.]\n", - " [ 6. 8. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 5. ..., 21. 21. 21.]\n", - " ..., \n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3646550178527832 seconds ---\n", + "[[ 5. 6. 4. ... 20. 20. 20.]\n", + " [ 6. 8. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 5. ... 21. 21. 21.]\n", + " ...\n", + " [ 20. 20. 21. ... 101. 101. 101.]\n", + " [ 20. 20. 21. ... 101. 101. 101.]\n", + " [ 20. 20. 21. ... 101. 101. 101.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 664.77it/s]\n", " Mean performance on train set: 17.681582\n", "With standard deviation: 0.713183\n", "\n", " Mean performance on test set: 15.685879\n", "With standard deviation: 4.139197\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 681.36it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 1.0 ---#\n", @@ -43,22 +44,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.8205692768096924 seconds ---\n", - "[[ 10. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 16. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 10. ..., 22. 22. 24.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 24. ..., 122. 122. 154.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.7535510063171387 seconds ---\n", + "[[ 10. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 16. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 10. ... 22. 22. 24.]\n", + " ...\n", + " [ 20. 20. 22. ... 130. 130. 122.]\n", + " [ 20. 20. 22. ... 130. 130. 122.]\n", + " [ 20. 20. 24. ... 122. 122. 154.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 945/1000 [00:01<00:00, 713.00it/s]\n", " Mean performance on train set: 6.270014\n", "With standard deviation: 0.654734\n", "\n", " Mean performance on test set: 7.550458\n", "With standard deviation: 2.331786\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.46it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 2.0 ---#\n", @@ -67,22 +69,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.375309705734253 seconds ---\n", - "[[ 15. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 24. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 15. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 159. 151. 124.]\n", - " [ 20. 20. 22. ..., 151. 153. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 185.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.3278343677520752 seconds ---\n", + "[[ 15. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 24. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 15. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 159. 151. 124.]\n", + " [ 20. 20. 22. ... 151. 153. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 185.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 949/1000 [00:01<00:00, 736.38it/s]\n", " Mean performance on train set: 4.450682\n", "With standard deviation: 0.882129\n", "\n", " Mean performance on test set: 9.728466\n", "With standard deviation: 2.057669\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 709.22it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 3.0 ---#\n", @@ -91,22 +94,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.8636789321899414 seconds ---\n", - "[[ 20. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 32. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 20. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 188. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 168. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 202.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.7653727531433105 seconds ---\n", + "[[ 20. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 32. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 20. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 188. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 168. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 202.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 959/1000 [00:01<00:00, 724.60it/s]\n", " Mean performance on train set: 2.270586\n", "With standard deviation: 0.481516\n", "\n", " Mean performance on test set: 11.296110\n", "With standard deviation: 2.799944\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 670.29it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 4.0 ---#\n", @@ -115,22 +119,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.5077457427978516 seconds ---\n", - "[[ 25. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 40. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 25. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 217. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 183. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 213.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2821996212005615 seconds ---\n", + "[[ 25. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 40. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 25. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 217. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 183. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 213.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 709.28it/s]\n", " Mean performance on train set: 1.074035\n", "With standard deviation: 0.637823\n", "\n", " Mean performance on test set: 12.808303\n", "With standard deviation: 3.446939\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 646.12it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 5.0 ---#\n", @@ -139,22 +144,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.8235607147216797 seconds ---\n", - "[[ 30. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 48. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 30. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 246. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 198. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 224.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.706934928894043 seconds ---\n", + "[[ 30. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 48. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 30. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 246. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 198. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 224.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 553.49it/s]\n", " Mean performance on train set: 0.700602\n", "With standard deviation: 0.572640\n", "\n", " Mean performance on test set: 14.017923\n", "With standard deviation: 3.675042\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 621.01it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 6.0 ---#\n", @@ -163,22 +169,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.458494186401367 seconds ---\n", - "[[ 35. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 56. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 35. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 275. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 213. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 235.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.1140964031219482 seconds ---\n", + "[[ 35. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 56. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 35. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 275. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 213. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 235.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|█████████▉| 997/1000 [00:01<00:00, 595.50it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 14.918434\n", "With standard deviation: 3.805352\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 586.05it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 7.0 ---#\n", @@ -187,22 +194,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.861224889755249 seconds ---\n", - "[[ 40. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 64. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 40. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 304. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 228. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 246.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.5894455909729004 seconds ---\n", + "[[ 40. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 64. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 40. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 304. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 228. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 246.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 991/1000 [00:01<00:00, 663.55it/s]\n", " Mean performance on train set: 0.691516\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 15.629476\n", "With standard deviation: 3.865387\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 627.59it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 8.0 ---#\n", @@ -211,22 +219,23 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.295838117599487 seconds ---\n", - "[[ 45. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 72. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 45. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 333. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 243. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 257.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.081295967102051 seconds ---\n", + "[[ 45. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 72. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 45. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 333. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 243. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 257.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 961/1000 [00:01<00:00, 601.33it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 16.214369\n", "With standard deviation: 3.928756\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 603.90it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 9.0 ---#\n", @@ -235,65 +244,74 @@ "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.008287668228149 seconds ---\n", - "[[ 50. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 80. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 50. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 362. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 258. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 268.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.497286796569824 seconds ---\n", + "[[ 50. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 80. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 50. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 362. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 258. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 268.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 511.55it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 16.725744\n", "With standard deviation: 3.993095\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 550.66it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " Calculating kernel matrix, this could take a while...\n", - "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.347799301147461 seconds ---\n", - "[[ 55. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 88. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 55. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 391. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 273. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 279.]]\n", - "\n", - " Saving kernel matrix to file...\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", + " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.984841585159302 seconds ---\n", + "[[ 55. 10. 4. ... 20. 20. 20.]\n", + " [ 10. 88. 4. ... 20. 20. 20.]\n", + " [ 4. 4. 55. ... 22. 22. 26.]\n", + " ...\n", + " [ 20. 20. 22. ... 391. 159. 124.]\n", + " [ 20. 20. 22. ... 159. 273. 124.]\n", + " [ 20. 20. 26. ... 124. 124. 279.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 942/1000 [00:01<00:00, 708.78it/s]\n", " Mean performance on train set: 0.691516\n", "With standard deviation: 0.564621\n", "\n", " Mean performance on test set: 17.186401\n", "With standard deviation: 4.056724\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 711.43it/s]\n", "\n", "\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", + " height rmse_test std_test rmse_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 15.6859 4.1392 17.6816 0.713183 0.389796\n", - " 1 7.55046 2.33179 6.27001 0.654734 0.820569\n", - " 2 9.72847 2.05767 4.45068 0.882129 1.37531\n", - " 3 11.2961 2.79994 2.27059 0.481516 1.86368\n", - " 4 12.8083 3.44694 1.07403 0.637823 2.50775\n", - " 5 14.0179 3.67504 0.700602 0.57264 2.82356\n", - " 6 14.9184 3.80535 0.691515 0.56462 3.45849\n", - " 7 15.6295 3.86539 0.691516 0.56462 3.86122\n", - " 8 16.2144 3.92876 0.691515 0.56462 4.29584\n", - " 9 16.7257 3.9931 0.691515 0.56462 5.00829\n", - " 10 17.1864 4.05672 0.691516 0.564621 5.3478\n" + " 0 15.6859 4.1392 17.6816 0.713183 0.364655\n", + " 1 7.55046 2.33179 6.27001 0.654734 0.753551\n", + " 2 9.72847 2.05767 4.45068 0.882129 1.32783\n", + " 3 11.2961 2.79994 2.27059 0.481516 1.76537\n", + " 4 12.8083 3.44694 1.07403 0.637823 2.2822\n", + " 5 14.0179 3.67504 0.700602 0.57264 2.70693\n", + " 6 14.9184 3.80535 0.691515 0.56462 3.1141\n", + " 7 15.6295 3.86539 0.691516 0.56462 3.58945\n", + " 8 16.2144 3.92876 0.691515 0.56462 4.0813\n", + " 9 16.7257 3.9931 0.691515 0.56462 4.49729\n", + " 10 17.1864 4.05672 0.691516 0.564621 4.98484\n" ] } ], "source": [ + "# wl subtree kernel\n", "%load_ext line_profiler\n", "\n", "import numpy as np\n", @@ -317,480 +335,744 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], - "source": [ - "# results\n", - "\n", - "# with y normalization\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", - "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n", - " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n", - " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n", - " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n", - " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n", - " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n", - " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n", - " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n", - " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n", - " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n", - " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n", - "\n", - "# without y normalization\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", - "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n", - " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n", - " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n", - " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n", - " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n", - " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n", - " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n", - " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n", - " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n", - " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n", - " 10 17.1864 4.05672 0.691516 0.564621 5.00918" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "scrolled": true - }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", + " --- This is a regression problem ---\n", "\n", - "@references\n", - " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", "\n", + " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", + " Loading dataset from file...\n", "\n", - " #--- calculating kernel matrix when subtree height = 0 ---#\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Loading dataset from file...\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 13.504083633422852 seconds ---\n", + "[[ 3. 1. 3. ... 1. 1. 1.]\n", + " [ 1. 6. 1. ... 0. 0. 3.]\n", + " [ 3. 1. 3. ... 1. 1. 1.]\n", + " ...\n", + " [ 1. 0. 1. ... 55. 21. 7.]\n", + " [ 1. 0. 1. ... 21. 55. 7.]\n", + " [ 1. 3. 1. ... 7. 7. 55.]]\n", "\n", - " --- This is a regression problem ---\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 773.79it/s]\n", + " Mean performance on train set: 28.360361\n", + "With standard deviation: 1.357183\n", "\n", - " Calculating kernel matrix, this could take a while...\n", + " Mean performance on test set: 35.191954\n", + "With standard deviation: 4.495767\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 743.82it/s]\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3920705318450928 seconds ---\n", - "[[ 5. 6. 4. ..., 20. 20. 20.]\n", - " [ 6. 8. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 5. ..., 21. 21. 21.]\n", - " ..., \n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]\n", - " [ 20. 20. 21. ..., 101. 101. 101.]]\n", "\n", - " Saving kernel matrix to file...\n", + " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", - " Mean performance on train set: 17.681582\n", - "With standard deviation: 0.713183\n", + " Loading dataset from file...\n", "\n", - " Mean performance on test set: 15.685879\n", - "With standard deviation: 4.139197\n", + " Calculating kernel matrix, this could take a while...\n", "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 26.82917618751526 seconds ---\n", + "[[ 6. 2. 6. ... 2. 2. 2.]\n", + " [ 2. 12. 2. ... 0. 0. 6.]\n", + " [ 6. 2. 6. ... 2. 2. 2.]\n", + " ...\n", + " [ 2. 0. 2. ... 110. 42. 14.]\n", + " [ 2. 0. 2. ... 42. 110. 14.]\n", + " [ 2. 6. 2. ... 14. 14. 110.]]\n", "\n", - " #--- calculating kernel matrix when subtree height = 1 ---#\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 751.78it/s]\n", + " Mean performance on train set: 27.933534\n", + "With standard deviation: 1.448359\n", "\n", - " Loading dataset from file...\n", + " Mean performance on test set: 35.180815\n", + "With standard deviation: 4.500453\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 744.44it/s]\n", "\n", - " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", + " #--- calculating kernel matrix when height = 2.0 ---#\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.8578901290893555 seconds ---\n", - "[[ 10. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 16. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 10. ..., 22. 22. 24.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 22. ..., 130. 130. 122.]\n", - " [ 20. 20. 24. ..., 122. 122. 154.]]\n", + " Loading dataset from file...\n", "\n", - " Saving kernel matrix to file...\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Mean performance on train set: 6.270014\n", - "With standard deviation: 0.654734\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 40.235626220703125 seconds ---\n", + "[[ 9. 3. 9. ... 3. 3. 3.]\n", + " [ 3. 18. 3. ... 0. 0. 9.]\n", + " [ 9. 3. 9. ... 3. 3. 3.]\n", + " ...\n", + " [ 3. 0. 3. ... 165. 63. 21.]\n", + " [ 3. 0. 3. ... 63. 165. 21.]\n", + " [ 3. 9. 3. ... 21. 21. 165.]]\n", "\n", - " Mean performance on test set: 7.550458\n", - "With standard deviation: 2.331786\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 694.10it/s]\n", + " Mean performance on train set: 28.111311\n", + "With standard deviation: 1.508915\n", "\n", + " Mean performance on test set: 35.163150\n", + "With standard deviation: 4.502054\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 695.02it/s]\n", "\n", - " #--- calculating kernel matrix when subtree height = 2 ---#\n", "\n", - " Loading dataset from file...\n", + " #--- calculating kernel matrix when height = 3.0 ---#\n", "\n", - " --- This is a regression problem ---\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.264050006866455 seconds ---\n", - "[[ 15. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 24. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 15. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 159. 151. 124.]\n", - " [ 20. 20. 22. ..., 151. 153. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 185.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 54.67040753364563 seconds ---\n", + "[[ 12. 4. 12. ... 4. 4. 4.]\n", + " [ 4. 24. 4. ... 0. 0. 12.]\n", + " [ 12. 4. 12. ... 4. 4. 4.]\n", + " ...\n", + " [ 4. 0. 4. ... 220. 84. 28.]\n", + " [ 4. 0. 4. ... 84. 220. 28.]\n", + " [ 4. 12. 4. ... 28. 28. 220.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 748.03it/s]\n", + " Mean performance on train set: 28.390274\n", + "With standard deviation: 1.365711\n", "\n", - " Mean performance on train set: 4.450682\n", - "With standard deviation: 0.882129\n", - "\n", - " Mean performance on test set: 9.728466\n", - "With standard deviation: 2.057669\n", + " Mean performance on test set: 35.194634\n", + "With standard deviation: 4.498007\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 726.68it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when subtree height = 3 ---#\n", + " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", - "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.731236219406128 seconds ---\n", - "[[ 20. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 32. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 20. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 188. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 168. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 202.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 67.15217232704163 seconds ---\n", + "[[ 15. 5. 15. ... 5. 5. 5.]\n", + " [ 5. 30. 5. ... 0. 0. 15.]\n", + " [ 15. 5. 15. ... 5. 5. 5.]\n", + " ...\n", + " [ 5. 0. 5. ... 275. 105. 35.]\n", + " [ 5. 0. 5. ... 105. 275. 35.]\n", + " [ 5. 15. 5. ... 35. 35. 275.]]\n", "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on train set: 2.270586\n", - "With standard deviation: 0.481516\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 950/1000 [00:01<00:00, 737.07it/s]\n", + " Mean performance on train set: 27.974611\n", + "With standard deviation: 1.462223\n", "\n", - " Mean performance on test set: 11.296110\n", - "With standard deviation: 2.799944\n", + " Mean performance on test set: 35.175314\n", + "With standard deviation: 4.501113\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.71it/s]\n", "\n", "\n", - " #--- calculating kernel matrix when subtree height = 4 ---#\n", + " #--- calculating kernel matrix when height = 5.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", - "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.1112847328186035 seconds ---\n", - "[[ 25. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 40. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 25. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 217. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 183. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 213.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 80.08806300163269 seconds ---\n", + "[[ 18. 6. 18. ... 6. 6. 6.]\n", + " [ 6. 36. 6. ... 0. 0. 18.]\n", + " [ 18. 6. 18. ... 6. 6. 6.]\n", + " ...\n", + " [ 6. 0. 6. ... 330. 126. 42.]\n", + " [ 6. 0. 6. ... 126. 330. 42.]\n", + " [ 6. 18. 6. ... 42. 42. 330.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 735.71it/s]\n", + " Mean performance on train set: 28.018415\n", + "With standard deviation: 1.455644\n", "\n", - " Mean performance on train set: 1.074035\n", - "With standard deviation: 0.637823\n", + " Mean performance on test set: 35.199713\n", + "With standard deviation: 4.507104\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 738.55it/s]\n", "\n", - " Mean performance on test set: 12.808303\n", - "With standard deviation: 3.446939\n", "\n", + " #--- calculating kernel matrix when height = 6.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " #--- calculating kernel matrix when subtree height = 5 ---#\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 92.19254112243652 seconds ---\n", + "[[ 21. 7. 21. ... 7. 7. 7.]\n", + " [ 7. 42. 7. ... 0. 0. 21.]\n", + " [ 21. 7. 21. ... 7. 7. 7.]\n", + " ...\n", + " [ 7. 0. 7. ... 385. 147. 49.]\n", + " [ 7. 0. 7. ... 147. 385. 49.]\n", + " [ 7. 21. 7. ... 49. 49. 385.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 721.42it/s]\n", + " Mean performance on train set: 28.373079\n", + "With standard deviation: 1.600565\n", + "\n", + " Mean performance on test set: 35.164471\n", + "With standard deviation: 4.498487\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 727.58it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 105.81170415878296 seconds ---\n", + "[[ 24. 8. 24. ... 8. 8. 8.]\n", + " [ 8. 48. 8. ... 0. 0. 24.]\n", + " [ 24. 8. 24. ... 8. 8. 8.]\n", + " ...\n", + " [ 8. 0. 8. ... 440. 168. 56.]\n", + " [ 8. 0. 8. ... 168. 440. 56.]\n", + " [ 8. 24. 8. ... 56. 56. 440.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 97%|█████████▋| 968/1000 [00:01<00:00, 739.67it/s]\n", + " Mean performance on train set: 27.960421\n", + "With standard deviation: 1.457425\n", + "\n", + " Mean performance on test set: 35.177115\n", + "With standard deviation: 4.500904\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 733.61it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 8.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.4751319885253906 seconds ---\n", - "[[ 30. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 48. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 30. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 246. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 198. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 224.]]\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 119.0216612815857 seconds ---\n", + "[[ 27. 9. 27. ... 9. 9. 9.]\n", + " [ 9. 54. 9. ... 0. 0. 27.]\n", + " [ 27. 9. 27. ... 9. 9. 9.]\n", + " ...\n", + " [ 9. 0. 9. ... 495. 189. 63.]\n", + " [ 9. 0. 9. ... 189. 495. 63.]\n", + " [ 9. 27. 9. ... 63. 63. 495.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 752.10it/s]\n", + " Mean performance on train set: 28.199059\n", + "With standard deviation: 1.514897\n", "\n", - " Mean performance on train set: 0.700602\n", - "With standard deviation: 0.572640\n", + " Mean performance on test set: 35.196848\n", + "With standard deviation: 4.505256\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 768.54it/s]\n", "\n", - " Mean performance on test set: 14.017923\n", - "With standard deviation: 3.675042\n", "\n", + " #--- calculating kernel matrix when height = 9.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 131.22810459136963 seconds ---\n", + "[[ 30. 10. 30. ... 10. 10. 10.]\n", + " [ 10. 60. 10. ... 0. 0. 30.]\n", + " [ 30. 10. 30. ... 10. 10. 10.]\n", + " ...\n", + " [ 10. 0. 10. ... 550. 210. 70.]\n", + " [ 10. 0. 10. ... 210. 550. 70.]\n", + " [ 10. 30. 10. ... 70. 70. 550.]]\n", "\n", - " #--- calculating kernel matrix when subtree height = 6 ---#\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 93%|█████████▎| 932/1000 [00:01<00:00, 763.55it/s]\n", + " Mean performance on train set: 28.266520\n", + "With standard deviation: 1.307686\n", + "\n", + " Mean performance on test set: 35.195635\n", + "With standard deviation: 4.501972\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 764.12it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", + " Calculating kernel matrix, this could take a while...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 144.96362161636353 seconds ---\n", + "[[ 33. 11. 33. ... 11. 11. 11.]\n", + " [ 11. 66. 11. ... 0. 0. 33.]\n", + " [ 33. 11. 33. ... 11. 11. 11.]\n", + " ...\n", + " [ 11. 0. 11. ... 605. 231. 77.]\n", + " [ 11. 0. 11. ... 231. 605. 77.]\n", + " [ 11. 33. 11. ... 77. 77. 605.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 100%|█████████▉| 996/1000 [00:01<00:00, 820.73it/s]\n", + " Mean performance on train set: 28.416280\n", + "With standard deviation: 1.615957\n", + "\n", + " Mean performance on test set: 35.167588\n", + "With standard deviation: 4.497227\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 822.53it/s]\n", + "\n", + "\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 35.192 4.49577 28.3604 1.35718 13.5041\n", + " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n", + " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n", + " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n", + " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n", + " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n", + " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n", + " 7 35.1771 4.5009 27.9604 1.45742 105.812\n", + " 8 35.1968 4.50526 28.1991 1.5149 119.022\n", + " 9 35.1956 4.50197 28.2665 1.30769 131.228\n", + " 10 35.1676 4.49723 28.4163 1.61596 144.964\n" + ] + } + ], + "source": [ + "# WL sp kernel\n", + "%load_ext line_profiler\n", + "\n", + "import numpy as np\n", + "import sys\n", + "sys.path.insert(0, \"../\")\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", + "\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", + "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'sp')\n", + "\n", + "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", + "\n", + "# %lprun -f _wl_subtreekernel_do \\\n", + "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The line_profiler extension is already loaded. To reload it, use:\n", + " %reload_ext line_profiler\n", + "\n", " --- This is a regression problem ---\n", "\n", - " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.8712213039398193 seconds ---\n", - "[[ 35. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 56. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 35. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 275. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 213. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 235.]]\n", + " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", - " Saving kernel matrix to file...\n", + " Loading dataset from file...\n", "\n", - " Mean performance on train set: 0.691515\n", - "With standard deviation: 0.564620\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Mean performance on test set: 14.918434\n", - "With standard deviation: 3.805352\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 0.8530018329620361 seconds ---\n", + "[[ 2. 1. 2. ... 0. 0. 1.]\n", + " [ 1. 3. 1. ... 0. 0. 2.]\n", + " [ 2. 1. 2. ... 0. 0. 1.]\n", + " ...\n", + " [ 0. 0. 0. ... 10. 7. 0.]\n", + " [ 0. 0. 0. ... 7. 10. 1.]\n", + " [ 1. 2. 1. ... 0. 1. 10.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▍| 947/1000 [00:01<00:00, 719.29it/s]\n", + " Mean performance on train set: 29.997498\n", + "With standard deviation: 0.902340\n", "\n", + " Mean performance on test set: 33.407740\n", + "With standard deviation: 4.732717\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 653.54it/s]\n", "\n", - " #--- calculating kernel matrix when subtree height = 7 ---#\n", + "\n", + " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 1.717505931854248 seconds ---\n", + "[[ 4. 2. 4. ... 0. 0. 2.]\n", + " [ 2. 6. 2. ... 0. 0. 4.]\n", + " [ 4. 2. 4. ... 0. 0. 2.]\n", + " ...\n", + " [ 0. 0. 0. ... 20. 14. 0.]\n", + " [ 0. 0. 0. ... 14. 20. 2.]\n", + " [ 2. 4. 2. ... 0. 2. 20.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 96%|█████████▌| 956/1000 [00:01<00:00, 721.27it/s]\n", + " Mean performance on train set: 30.160338\n", + "With standard deviation: 1.094235\n", + "\n", + " Mean performance on test set: 33.423458\n", + "With standard deviation: 4.721311\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 723.53it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 2.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.554422378540039 seconds ---\n", - "[[ 40. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 64. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 40. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 304. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 228. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 246.]]\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 2.6603214740753174 seconds ---\n", + "[[ 6. 3. 6. ... 0. 0. 3.]\n", + " [ 3. 9. 3. ... 0. 0. 6.]\n", + " [ 6. 3. 6. ... 0. 0. 3.]\n", + " ...\n", + " [ 0. 0. 0. ... 30. 21. 0.]\n", + " [ 0. 0. 0. ... 21. 30. 3.]\n", + " [ 3. 6. 3. ... 0. 3. 30.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▍| 944/1000 [00:01<00:00, 650.98it/s]\n", + " Mean performance on train set: 29.928570\n", + "With standard deviation: 0.787941\n", "\n", - " Mean performance on train set: 0.691516\n", - "With standard deviation: 0.564620\n", + " Mean performance on test set: 33.433014\n", + "With standard deviation: 4.724408\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 688.71it/s]\n", "\n", - " Mean performance on test set: 15.629476\n", - "With standard deviation: 3.865387\n", "\n", + " #--- calculating kernel matrix when height = 3.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 3.477631092071533 seconds ---\n", + "[[ 8. 4. 8. ... 0. 0. 4.]\n", + " [ 4. 12. 4. ... 0. 0. 8.]\n", + " [ 8. 4. 8. ... 0. 0. 4.]\n", + " ...\n", + " [ 0. 0. 0. ... 40. 28. 0.]\n", + " [ 0. 0. 0. ... 28. 40. 4.]\n", + " [ 4. 8. 4. ... 0. 4. 40.]]\n", "\n", - " #--- calculating kernel matrix when subtree height = 8 ---#\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 725.15it/s]\n", + " Mean performance on train set: 30.011409\n", + "With standard deviation: 0.909674\n", + "\n", + " Mean performance on test set: 33.407319\n", + "With standard deviation: 4.732434\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 720.71it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 4.5436692237854 seconds ---\n", + "[[10. 5. 10. ... 0. 0. 5.]\n", + " [ 5. 15. 5. ... 0. 0. 10.]\n", + " [10. 5. 10. ... 0. 0. 5.]\n", + " ...\n", + " [ 0. 0. 0. ... 50. 35. 0.]\n", + " [ 0. 0. 0. ... 35. 50. 5.]\n", + " [ 5. 10. 5. ... 0. 5. 50.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 568.04it/s]\n", + " Mean performance on train set: 30.184162\n", + "With standard deviation: 1.108902\n", + "\n", + " Mean performance on test set: 33.425625\n", + "With standard deviation: 4.721660\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 564.24it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 5.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8757314682006836 seconds ---\n", - "[[ 45. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 72. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 45. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 333. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 243. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 257.]]\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 5.6617820262908936 seconds ---\n", + "[[12. 6. 12. ... 0. 0. 6.]\n", + " [ 6. 18. 6. ... 0. 0. 12.]\n", + " [12. 6. 12. ... 0. 0. 6.]\n", + " ...\n", + " [ 0. 0. 0. ... 60. 42. 0.]\n", + " [ 0. 0. 0. ... 42. 60. 6.]\n", + " [ 6. 12. 6. ... 0. 6. 60.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 99%|█████████▉| 993/1000 [00:01<00:00, 519.25it/s]\n", + " Mean performance on train set: 30.041068\n", + "With standard deviation: 1.018451\n", "\n", - " Mean performance on train set: 0.691515\n", - "With standard deviation: 0.564620\n", + " Mean performance on test set: 33.406717\n", + "With standard deviation: 4.726409\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 548.91it/s]\n", "\n", - " Mean performance on test set: 16.214369\n", - "With standard deviation: 3.928756\n", "\n", + " #--- calculating kernel matrix when height = 6.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 6.148027420043945 seconds ---\n", + "[[14. 7. 14. ... 0. 0. 7.]\n", + " [ 7. 21. 7. ... 0. 0. 14.]\n", + " [14. 7. 14. ... 0. 0. 7.]\n", + " ...\n", + " [ 0. 0. 0. ... 70. 49. 0.]\n", + " [ 0. 0. 0. ... 49. 70. 7.]\n", + " [ 7. 14. 7. ... 0. 7. 70.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 498.31it/s]\n", + " Mean performance on train set: 29.905596\n", + "With standard deviation: 0.782179\n", "\n", - " #--- calculating kernel matrix when subtree height = 9 ---#\n", + " Mean performance on test set: 33.418992\n", + "With standard deviation: 4.730753\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 534.86it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.603543519973755 seconds ---\n", + "[[16. 8. 16. ... 0. 0. 8.]\n", + " [ 8. 24. 8. ... 0. 0. 16.]\n", + " [16. 8. 16. ... 0. 0. 8.]\n", + " ...\n", + " [ 0. 0. 0. ... 80. 56. 0.]\n", + " [ 0. 0. 0. ... 56. 80. 8.]\n", + " [ 8. 16. 8. ... 0. 8. 80.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 586.15it/s]\n", + " Mean performance on train set: 30.175921\n", + "With standard deviation: 1.103820\n", + "\n", + " Mean performance on test set: 33.424820\n", + "With standard deviation: 4.721550\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 546.00it/s]\n", + "\n", + "\n", + " #--- calculating kernel matrix when height = 8.0 ---#\n", + "\n", + " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.205373764038086 seconds ---\n", - "[[ 50. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 80. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 50. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 362. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 258. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 268.]]\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.972221612930298 seconds ---\n", + "[[18. 9. 18. ... 0. 0. 9.]\n", + " [ 9. 27. 9. ... 0. 0. 18.]\n", + " [18. 9. 18. ... 0. 0. 9.]\n", + " ...\n", + " [ 0. 0. 0. ... 90. 63. 0.]\n", + " [ 0. 0. 0. ... 63. 90. 9.]\n", + " [ 9. 18. 9. ... 0. 9. 90.]]\n", "\n", - " Saving kernel matrix to file...\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 490.30it/s]\n", + " Mean performance on train set: 30.136537\n", + "With standard deviation: 1.074854\n", "\n", - " Mean performance on train set: 0.691515\n", - "With standard deviation: 0.564620\n", + " Mean performance on test set: 33.412196\n", + "With standard deviation: 4.715539\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 536.66it/s]\n", "\n", - " Mean performance on test set: 16.725744\n", - "With standard deviation: 3.993095\n", "\n", + " #--- calculating kernel matrix when height = 9.0 ---#\n", + "\n", + " Loading dataset from file...\n", + "\n", + " Calculating kernel matrix, this could take a while...\n", + "\n", + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 9.070842504501343 seconds ---\n", + "[[ 20. 10. 20. ... 0. 0. 10.]\n", + " [ 10. 30. 10. ... 0. 0. 20.]\n", + " [ 20. 10. 20. ... 0. 0. 10.]\n", + " ...\n", + " [ 0. 0. 0. ... 100. 70. 0.]\n", + " [ 0. 0. 0. ... 70. 100. 10.]\n", + " [ 10. 20. 10. ... 0. 10. 100.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n", + "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 527.13it/s]\n", + " Mean performance on train set: 30.032887\n", + "With standard deviation: 0.921065\n", + "\n", + " Mean performance on test set: 33.407050\n", + "With standard deviation: 4.731928\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 600.62it/s]\n", "\n", - " #--- calculating kernel matrix when subtree height = 10 ---#\n", + "\n", + " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", - " --- This is a regression problem ---\n", + " Calculating kernel matrix, this could take a while...\n", "\n", - " Calculating kernel matrix, this could take a while...\n" + " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 10.02536916732788 seconds ---\n", + "[[ 22. 11. 22. ... 0. 0. 11.]\n", + " [ 11. 33. 11. ... 0. 0. 22.]\n", + " [ 22. 11. 22. ... 0. 0. 11.]\n", + " ...\n", + " [ 0. 0. 0. ... 110. 77. 0.]\n", + " [ 0. 0. 0. ... 77. 110. 11.]\n", + " [ 11. 22. 11. ... 0. 11. 110.]]\n", + "\n", + " Starting calculate accuracy/rmse...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", - " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.737298250198364 seconds ---\n", - "[[ 55. 10. 4. ..., 20. 20. 20.]\n", - " [ 10. 88. 4. ..., 20. 20. 20.]\n", - " [ 4. 4. 55. ..., 22. 22. 26.]\n", - " ..., \n", - " [ 20. 20. 22. ..., 391. 159. 124.]\n", - " [ 20. 20. 22. ..., 159. 273. 124.]\n", - " [ 20. 20. 26. ..., 124. 124. 279.]]\n", - "\n", - " Saving kernel matrix to file...\n", - "\n", - " Mean performance on train set: 0.691516\n", - "With standard deviation: 0.564621\n", - "\n", - " Mean performance on test set: 17.186401\n", - "With standard deviation: 4.056724\n", - "\n", - "\n", - " height RMSE_test std_test RMSE_train std_train k_time\n", - "-------- ----------- ---------- ------------ ----------- --------\n", - " 0 15.6859 4.1392 17.6816 0.713183 0.392071\n", - " 1 7.55046 2.33179 6.27001 0.654734 0.85789\n", - " 2 9.72847 2.05767 4.45068 0.882129 1.26405\n", - " 3 11.2961 2.79994 2.27059 0.481516 1.73124\n", - " 4 12.8083 3.44694 1.07403 0.637823 2.11128\n", - " 5 14.0179 3.67504 0.700602 0.57264 2.47513\n", - " 6 14.9184 3.80535 0.691515 0.56462 2.87122\n", - " 7 15.6295 3.86539 0.691516 0.56462 3.55442\n", - " 8 16.2144 3.92876 0.691515 0.56462 3.87573\n", - " 9 16.7257 3.9931 0.691515 0.56462 4.20537\n", - " 10 17.1864 4.05672 0.691516 0.564621 4.7373\n" + "calculate performance: 97%|█████████▋| 970/1000 [00:01<00:00, 694.38it/s]\n", + " Mean performance on train set: 29.924232\n", + "With standard deviation: 0.790843\n", + "\n", + " Mean performance on test set: 33.416469\n", + "With standard deviation: 4.731694\n", + "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 678.72it/s]\n", + "\n", + "\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- ---------\n", + " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n", + " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n", + " 2 33.433 4.72441 29.9286 0.787941 2.66032\n", + " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n", + " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n", + " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n", + " 6 33.419 4.73075 29.9056 0.782179 6.14803\n", + " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n", + " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n", + " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n", + " 10 33.4165 4.73169 29.9242 0.790843 10.0254\n" ] } ], "source": [ - "# test of WL subtree kernel\n", - "\n", - "\"\"\"\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\"\"\"\n", - "\n", - "print(__doc__)\n", + "# WL edge kernel\n", + "%load_ext line_profiler\n", "\n", + "import numpy as np\n", "import sys\n", - "import os\n", - "import pathlib\n", - "from collections import OrderedDict\n", "sys.path.insert(0, \"../\")\n", - "from tabulate import tabulate\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", - "from pygraph.utils.graphfiles import loadDataset\n", - "from pygraph.utils.utils import split_train_test\n", - "\n", - "train_means_list = []\n", - "train_stds_list = []\n", - "test_means_list = []\n", - "test_stds_list = []\n", - "kernel_time_list = []\n", - "\n", - "for height in np.linspace(0, 10, 11):\n", - " print('\\n\\n #--- calculating kernel matrix when subtree height = %d ---#' % height)\n", - "\n", - " print('\\n Loading dataset from file...')\n", - " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", - " y = np.array(y)\n", - "# print(y)\n", - "\n", - " # setup the parameters\n", - " model_type = 'regression' # Regression or classification problem\n", - " print('\\n --- This is a %s problem ---' % model_type)\n", + "from pygraph.utils.utils import kernel_train_test\n", + "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", "\n", - "# datasize = len(dataset)\n", - " trials = 100 # Trials for hyperparameters random search\n", - " splits = 10 # Number of splits of the data\n", - " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", - " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", + "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", + "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", "\n", + "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'edge')\n", "\n", - " # set the output path\n", - " kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", - " if not os.path.exists(kernel_file_path):\n", - " os.makedirs(kernel_file_path)\n", + "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", "\n", - " \"\"\"\n", - " - Here starts the main program\n", - " - First we permute the data, then for each split we evaluate corresponding performances\n", - " - In the end, the performances are averaged over the test sets\n", - " \"\"\"\n", + "# %lprun -f _wl_subtreekernel_do \\\n", + "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", + "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# results\n", "\n", - " # save kernel matrices to files / read kernel matrices from files\n", - " kernel_file = kernel_file_path + 'km.ds'\n", - " path = pathlib.Path(kernel_file)\n", - " # get train set kernel matrix\n", - " if path.is_file():\n", - " print('\\n Loading the kernel matrix from file...')\n", - " Kmatrix = np.loadtxt(kernel_file)# results\n", - " print(Kmatrix)\n", - " else:\n", - " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n", - " kernel_time_list.append(run_time)\n", - " print(Kmatrix)\n", - " print('\\n Saving kernel matrix to file...')\n", - " # np.savetxt(kernel_file, Kmatrix)\n", + "# subtree with y normalization\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n", + " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n", + " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n", + " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n", + " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n", + " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n", + " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n", + " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n", + " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n", + " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n", + " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n", "\n", - " train_mean, train_std, test_mean, test_std = \\\n", - " split_train_test(Kmatrix, y, alpha_grid, C_grid, splits, trials, model_type, normalize = False)\n", - " \n", - " train_means_list.append(train_mean)\n", - " train_stds_list.append(train_std)\n", - " test_means_list.append(test_mean)\n", - " test_stds_list.append(test_std)\n", + "# subtree without y normalization\n", + " height RMSE_test std_test RMSE_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n", + " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n", + " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n", + " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n", + " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n", + " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n", + " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n", + " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n", + " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n", + " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n", + " 10 17.1864 4.05672 0.691516 0.564621 5.00918\n", " \n", - "print('\\n') \n", - "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_list, 'std_test': test_stds_list, \\\n", - " 'RMSE_train': train_means_list, 'std_train': train_stds_list, 'k_time': kernel_time_list}\n", - "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n", - "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))" + "# sp\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- --------\n", + " 0 35.192 4.49577 28.3604 1.35718 13.5041\n", + " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n", + " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n", + " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n", + " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n", + " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n", + " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n", + " 7 35.1771 4.5009 27.9604 1.45742 105.812\n", + " 8 35.1968 4.50526 28.1991 1.5149 119.022\n", + " 9 35.1956 4.50197 28.2665 1.30769 131.228\n", + " 10 35.1676 4.49723 28.4163 1.61596 144.964\n", + " \n", + "# path\n", + " height rmse_test std_test rmse_train std_train k_time\n", + "-------- ----------- ---------- ------------ ----------- ---------\n", + " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n", + " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n", + " 2 33.433 4.72441 29.9286 0.787941 2.66032\n", + " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n", + " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n", + " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n", + " 6 33.419 4.73075 29.9056 0.782179 6.14803\n", + " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n", + " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n", + " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n", + " 10 33.4165 4.73169 29.9242 0.790843 10.0254" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": { "scrolled": true }, @@ -800,117 +1082,25 @@ "output_type": "stream", "text": [ "{'O', 'C'}\n", - "{'O', 'C'}\n", - "--- shortest path kernel built in 0.0002582073211669922 seconds ---\n", - "3\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'O'})]\n", - " -> \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CO'}), (3, {'label': 'CCCO'}), (4, {'label': 'OCC'})]\n", - " -> \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '3'}), (3, {'label': '1'}), (4, {'label': '2'})]\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'C'}), (5, {'label': 'C'}), (6, {'label': 'O'})]\n", - " -> \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CC'}), (3, {'label': 'CO'}), (4, {'label': 'CCCC'}), (5, {'label': 'CCCO'}), (6, {'label': 'OCC'})]\n", - " -> \n" + "{'O', 'C'}\n" ] }, { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '0'}), (3, {'label': '3'}), (4, {'label': '4'}), (5, {'label': '1'}), (6, {'label': '2'})]\n", - "--- shortest path kernel built in 0.00026607513427734375 seconds ---\n", - "6\n" + "ename": "TypeError", + "evalue": "'int' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(edge_weight, *args)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mgetSPGraph\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \"\"\"\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'bond_type'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mfloydTransformation\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \"\"\"\n\u001b[0;32m---> 58\u001b[0;31m \u001b[0mspMatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloyd_warshall_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_nodes_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/algorithms/shortest_paths/dense.py\u001b[0m in \u001b[0;36mfloyd_warshall_numpy\u001b[0;34m(G, nodelist, weight)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m# nonedges are not given the value 0 as well.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m A = nx.to_numpy_matrix(G, nodelist=nodelist, multigraph_weight=min,\n\u001b[0;32m---> 54\u001b[0;31m weight=weight, nonedge=np.inf)\n\u001b[0m\u001b[1;32m 55\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mI\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midentity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_matrix\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 446\u001b[0m A = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order,\n\u001b[1;32m 447\u001b[0m \u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 448\u001b[0;31m nonedge=nonedge)\n\u001b[0m\u001b[1;32m 449\u001b[0m \u001b[0mM\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mM\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_array\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1062\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnodelist\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1063\u001b[0;31m \u001b[0mnodelist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1064\u001b[0m \u001b[0mnodeset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1065\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodeset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: 'int' object is not iterable" ] } ], @@ -1539,411 +1729,6 @@ "weisfeilerlehmankernel(G1, G2, height = 2)\n", "# Kmatrix = weisfeilerlehmankernel(G1, G2)" ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "185" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "len(dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\n", - "\n", - " --- calculating kernel matrix when subtree height = 0 ---\n", - "\n", - " Loading dataset from file...\n", - "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", - " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", - " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", - " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", - " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", - " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", - " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", - " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", - " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", - " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", - " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", - " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", - " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", - " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", - " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", - " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", - " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", - " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", - "\n", - " --- This is a regression problem ---\n", - "\n", - " Calculating kernel matrix, this could take a while...\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "# Author: Elisabetta Ghisu\n", - "# test of WL subtree kernel\n", - "\n", - "\"\"\"\n", - "- This script take as input a kernel matrix\n", - "and returns the classification or regression performance\n", - "- The kernel matrix can be calculated using any of the graph kernels approaches\n", - "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", - "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", - "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", - "provide the corresponding performance on the test set. If more than one split is performed, the final results \n", - "correspond to the average of the performances on the test sets. \n", - "\n", - "@references\n", - " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", - "\"\"\"\n", - "\n", - "print(__doc__)\n", - "\n", - "import sys\n", - "import os\n", - "import pathlib\n", - "sys.path.insert(0, \"../\")\n", - "from tabulate import tabulate\n", - "\n", - "import random\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from sklearn.kernel_ridge import KernelRidge # 0.17\n", - "from sklearn.metrics import accuracy_score, mean_squared_error\n", - "from sklearn import svm\n", - "\n", - "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", - "from pygraph.utils.graphfiles import loadDataset\n", - "\n", - "val_means_height = []\n", - "val_stds_height = []\n", - "test_means_height = []\n", - "test_stds_height = []\n", - "\n", - "\n", - "for height in np.linspace(0, 10, 11):\n", - " print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n", - "\n", - " print('\\n Loading dataset from file...')\n", - " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", - " y = np.array(y)\n", - " print(y)\n", - "\n", - " # setup the parameters\n", - " model_type = 'regression' # Regression or classification problem\n", - " print('\\n --- This is a %s problem ---' % model_type)\n", - "\n", - " datasize = len(dataset)\n", - " trials = 100 # Trials for hyperparameters random search\n", - " splits = 10 # Number of splits of the data\n", - " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", - " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", - " random.seed(20) # Set the seed for uniform parameter distribution\n", - "\n", - " # set the output path\n", - " kernel_file_path = 'kernelmatrices_weisfeilerlehman_acyclic/'\n", - " if not os.path.exists(kernel_file_path):\n", - " os.makedirs(kernel_file_path)\n", - "\n", - "\n", - " \"\"\"\n", - " - Here starts the main program\n", - " - First we permute the data, then for each split we evaluate corresponding performances\n", - " - In the end, the performances are averaged over the test sets\n", - " \"\"\"\n", - "\n", - " # save kernel matrices to files / read kernel matrices from files\n", - " kernel_file = kernel_file_path + 'km.ds'\n", - " path = pathlib.Path(kernel_file)\n", - " # get train set kernel matrix\n", - " if path.is_file():\n", - " print('\\n Loading the kernel matrix from file...')\n", - " Kmatrix = np.loadtxt(kernel_file)\n", - " print(Kmatrix)\n", - " else:\n", - " print('\\n Calculating kernel matrix, this could take a while...')\n", - " Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n", - " print(Kmatrix)\n", - " print('\\n Saving kernel matrix to file...')\n", - "# np.savetxt(kernel_file, Kmatrix)\n", - "\n", - " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", - " val_split = []\n", - " test_split = []\n", - "\n", - " # For each split of the data\n", - " for j in range(10, 10 + splits):\n", - " # print('\\n Starting split %d...' % j)\n", - "\n", - " # Set the random set for data permutation\n", - " random_state = int(j)\n", - " np.random.seed(random_state)\n", - " idx_perm = np.random.permutation(datasize)\n", - " # print(idx_perm)\n", - "\n", - " # Permute the data\n", - " y_perm = y[idx_perm] # targets permutation\n", - " # print(y_perm)\n", - " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n", - " # print(Kmatrix_perm)\n", - " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", - "\n", - " # Set the training, validation and test\n", - " # Note: the percentage can be set up by the user\n", - " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", - " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", - " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", - " num_val = num_train_val - num_train # 10% (of train + val) for validation\n", - "\n", - " # Split the kernel matrix\n", - " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", - " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", - " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", - "\n", - " # Split the targets\n", - " y_train = y_perm[0:num_train]\n", - "\n", - " # Normalization step (for real valued targets only)\n", - " if model_type == 'regression':\n", - " # print('\\n Normalizing output y...')\n", - " y_train_mean = np.mean(y_train)\n", - " y_train_std = np.std(y_train)\n", - " y_train = (y_train - y_train_mean) / float(y_train_std)\n", - " # print(y)\n", - "\n", - " y_val = y_perm[num_train:(num_train + num_val)]\n", - " y_test = y_perm[(num_train + num_val):datasize]\n", - "\n", - " # Record the performance for each parameter trial respectively on validation and test set\n", - " perf_all_train = []\n", - " perf_all_test = []\n", - "\n", - " # For each parameter trial\n", - " for i in range(trials):\n", - " # For regression use the Kernel Ridge method\n", - " if model_type == 'regression':\n", - " # print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n", - "\n", - " # Fit the kernel ridge model\n", - " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n", - " # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", - " KR.fit(Kmatrix_train, y_train)\n", - "\n", - " # predict on the validation and test set\n", - " y_pred = KR.predict(Kmatrix_val)\n", - " y_pred_test = KR.predict(Kmatrix_test)\n", - " # print(y_pred)\n", - "\n", - " # adjust prediction: needed because the training targets have been normalizaed\n", - " y_pred = y_pred * float(y_train_std) + y_train_mean\n", - " # print(y_pred)\n", - " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", - " # print(y_pred_test)\n", - "\n", - " # root mean squared error on validation\n", - " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", - " perf_all_val.append(rmse)\n", - "\n", - " # root mean squared error in test \n", - " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", - " perf_all_test.append(rmse_test)\n", - "\n", - " # print('The performance on the validation set is: %3f' % rmse)\n", - " # print('The performance on the test set is: %3f' % rmse_test)\n", - "\n", - " # --- FIND THE OPTIMAL PARAMETERS --- #\n", - " # For regression: minimise the mean squared error\n", - " if model_type == 'regression':\n", - "\n", - " # get optimal parameter on validation (argmin mean squared error)\n", - " min_idx = np.argmin(perf_all_test)\n", - " alpha_opt = alpha_grid[min_idx]\n", - "\n", - " # performance corresponding to optimal parameter on val\n", - " perf_val_opt = perf_all_val[min_idx]\n", - "\n", - " # corresponding performance on test for the same parameter\n", - " perf_test_opt = perf_all_test[min_idx]\n", - "\n", - " # print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", - " # print('The best performance on the validation set is: %3f' % perf_val_opt)\n", - " # print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", - "\n", - " # append the best performance on validation\n", - " # at the current split\n", - " val_split.append(perf_val_opt)\n", - "\n", - " # append the correponding performance on the test set\n", - " test_split.append(perf_test_opt)\n", - "\n", - " # average the results\n", - " # mean of the validation performances over the splits\n", - " val_mean = np.mean(np.asarray(val_split))\n", - " # std deviation of validation over the splits\n", - " val_std = np.std(np.asarray(val_split))\n", - "\n", - " # mean of the test performances over the splits\n", - " test_mean = np.mean(np.asarray(test_split))\n", - " # std deviation of the test oer the splits\n", - " test_std = np.std(np.asarray(test_split))\n", - "\n", - " print('\\n Mean performance on val set: %3f' % val_mean)\n", - " print('With standard deviation: %3f' % val_std)\n", - " print('\\n Mean performance on test set: %3f' % test_mean)\n", - " print('With standard deviation: %3f' % test_std)\n", - " \n", - " val_means_height.append(val_mean)\n", - " val_stds_height.append(val_std)\n", - " test_means_height.append(test_mean)\n", - " test_stds_height.append(test_std)\n", - " \n", - "print('\\n') \n", - "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# a = [0, 1, 3, 2]\n", - "# b = [3, 2, 1, 0]\n", - "# print(1 if a == b else 0)\n", - "\n", - "# max(1 ,2)\n", - "\n", - "# x = [ 'r', 'a', 's' ]\n", - "# x.sort()\n", - "# print(x)\n", - "\n", - "# def test1(*args, base = 'subtree'):\n", - "# if base == 'subtree':\n", - "# print('subtree')\n", - "# elif base == 'edge':\n", - "# print('edge')\n", - "# else:\n", - "# print('sp')\n", - "\n", - "# # function parameter usage test\n", - "# test1('hello', 'hi', base = 'edge')\n", - "\n", - "# # python matrix calculation speed test\n", - "# import numpy as np\n", - "# import time\n", - "\n", - "# size = 100\n", - "# m1 = np.random.random((size, size))\n", - "# m2 = np.random.random((size, size))\n", - "# itr = 1\n", - "\n", - "# start_time = time.time()\n", - "# for i in range(itr):\n", - "# np.dot(m1, m2)\n", - "# print(time.time() - start_time)\n", - "\n", - "# start_time = time.time()\n", - "# for j in range(itr):\n", - "# result = np.zeros((size, size))\n", - "# for i1 in range(size):\n", - "# for i2 in range(size):\n", - "# for i3 in range(size):\n", - "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", - "# print(time.time() - start_time)\n", - "\n", - "# start_time = time.time()\n", - "# for i in range(itr):\n", - "# print(np.dot(m1, m2))\n", - "# print(time.time() - start_time)\n", - "\n", - "# start_time = time.time()\n", - "# for j in range(itr):\n", - "# result = np.zeros((size, size))\n", - "# for i1 in range(size):\n", - "# for i2 in range(size):\n", - "# for i3 in range(size):\n", - "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n", - "# print(result)\n", - "# print(time.time() - start_time)\n", - "\n", - "# help(np.sum)\n", - "\n", - "# test dict\n", - "import sys\n", - "from collections import Counter\n", - "import networkx as nx\n", - "sys.path.insert(0, \"../\")\n", - "from pygraph.utils.graphfiles import loadDataset\n", - "from pygraph.kernels.spkernel import spkernel\n", - "\n", - "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", - "G1 = dataset[15]\n", - "nx.get_node_attributes(G1, 'label')\n", - "listhqhq = list(nx.get_node_attributes(G1, 'label').values())\n", - "dicthaha = dict(Counter(listhqhq))\n", - "len(dicthaha)" - ] } ], "metadata": { diff --git a/pygraph/kernels/__pycache__/cyclicPatternKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/cyclicPatternKernel.cpython-35.pyc new file mode 100644 index 0000000..e589239 Binary files /dev/null and b/pygraph/kernels/__pycache__/cyclicPatternKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/__pycache__/deltaKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/deltaKernel.cpython-35.pyc index edf374d..606b6b2 100644 Binary files a/pygraph/kernels/__pycache__/deltaKernel.cpython-35.pyc and b/pygraph/kernels/__pycache__/deltaKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/__pycache__/marginalizedKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/marginalizedKernel.cpython-35.pyc index 240a5c5..0cb5c91 100644 Binary files a/pygraph/kernels/__pycache__/marginalizedKernel.cpython-35.pyc and b/pygraph/kernels/__pycache__/marginalizedKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/__pycache__/pathKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/pathKernel.cpython-35.pyc index e6e50c8..2664542 100644 Binary files a/pygraph/kernels/__pycache__/pathKernel.cpython-35.pyc and b/pygraph/kernels/__pycache__/pathKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/__pycache__/spKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/spKernel.cpython-35.pyc index af6d976..5315174 100644 Binary files a/pygraph/kernels/__pycache__/spKernel.cpython-35.pyc and b/pygraph/kernels/__pycache__/spKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/__pycache__/treePatternKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/treePatternKernel.cpython-35.pyc new file mode 100644 index 0000000..199e6d5 Binary files /dev/null and b/pygraph/kernels/__pycache__/treePatternKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/__pycache__/treeletKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/treeletKernel.cpython-35.pyc index 05209a4..aed6e66 100644 Binary files a/pygraph/kernels/__pycache__/treeletKernel.cpython-35.pyc and b/pygraph/kernels/__pycache__/treeletKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc index 911f076..7b7f796 100644 Binary files a/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc and b/pygraph/kernels/__pycache__/weisfeilerLehmanKernel.cpython-35.pyc differ diff --git a/pygraph/kernels/cyclicPatternKernel.py b/pygraph/kernels/cyclicPatternKernel.py new file mode 100644 index 0000000..b4e0da0 --- /dev/null +++ b/pygraph/kernels/cyclicPatternKernel.py @@ -0,0 +1,147 @@ +""" +@author: linlin +@references: + [1] Tamás Horváth, Thomas Gärtner, and Stefan Wrobel. Cyclic pattern kernels for predictive graph mining. In Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining, pages 158–167. ACM, 2004. + [2] Hopcroft, J.; Tarjan, R. (1973). “Efficient algorithms for graph manipulation”. Communications of the ACM 16: 372–378. doi:10.1145/362248.362272. + [3] Finding all the elementary circuits of a directed graph. D. B. Johnson, SIAM Journal on Computing 4, no. 1, 77-84, 1975. http://dx.doi.org/10.1137/0204007 +""" + +import sys +import pathlib +sys.path.insert(0, "../") +import time + +import networkx as nx +import numpy as np + +from tqdm import tqdm + + +def cyclicpatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None): + """Calculate cyclic pattern graph kernels between graphs. + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + / + G1, G2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + node_label : string + node attribute used as label. The default node label is atom. + edge_label : string + edge attribute used as label. The default edge label is bond_type. + labeled : boolean + Whether the graphs are labeled. The default is True. + depth : integer + Depth of search. Longest length of paths. + + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the path kernel up to d between 2 praphs. + """ + Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list + Kmatrix = np.zeros((len(Gn), len(Gn))) + + start_time = time.time() + + # get all cyclic and tree patterns of all graphs before calculating kernels to save time, but this may consume a lot of memory for large dataset. + all_patterns = [ get_patterns(Gn[i], node_label = node_label, edge_label = edge_label, labeled = labeled, cycle_bound = cycle_bound) + for i in tqdm(range(0, len(Gn)), desc = 'retrieve patterns', file=sys.stdout) ] + + for i in tqdm(range(0, len(Gn)), desc = 'calculate kernels', file=sys.stdout): + for j in range(i, len(Gn)): + Kmatrix[i][j] = _cyclicpatternkernel_do(all_patterns[i], all_patterns[j]) + Kmatrix[j][i] = Kmatrix[i][j] + + run_time = time.time() - start_time + print("\n --- kernel matrix of cyclic pattern kernel of size %d built in %s seconds ---" % (len(Gn), run_time)) + + return Kmatrix, run_time + + +def _cyclicpatternkernel_do(patterns1, patterns2): + """Calculate path graph kernels up to depth d between 2 graphs. + + Parameters + ---------- + paths1, paths2 : list + List of paths in 2 graphs, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path. + k_func : function + A kernel function used using different notions of fingerprint similarity. + node_label : string + node attribute used as label. The default node label is atom. + edge_label : string + edge attribute used as label. The default edge label is bond_type. + labeled : boolean + Whether the graphs are labeled. The default is True. + + Return + ------ + kernel : float + Treelet Kernel between 2 graphs. + """ + return len(set(patterns1) & set(patterns2)) + + +def get_patterns(G, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None): + """Find all cyclic and tree patterns in a graph. + + Parameters + ---------- + G : NetworkX graphs + The graph in which paths are searched. + length : integer + The maximum length of paths. + node_label : string + node attribute used as label. The default node label is atom. + edge_label : string + edge attribute used as label. The default edge label is bond_type. + labeled : boolean + Whether the graphs are labeled. The default is True. + + Return + ------ + path : list + List of paths retrieved, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path. + """ + number_simplecycles = 0 + bridges = nx.Graph() + patterns = [] + + bicomponents = nx.biconnected_component_subgraphs(G) # all biconnected components of G. this function use algorithm in reference [2], which (i guess) is slightly different from the one used in paper [1] + for subgraph in bicomponents: + if nx.number_of_edges(subgraph) > 1: + simple_cycles = list(nx.simple_cycles(G.to_directed())) # all simple cycles in biconnected components. this function use algorithm in reference [3], which has time complexity O((n+e)(N+1)) for n nodes, e edges and N simple cycles. Which might be slower than the algorithm applied in paper [1] + if cycle_bound != None and len(simple_cycles) > cycle_bound - number_simplecycles: # in paper [1], when applying another algorithm (subroutine RT), this becomes len(simple_cycles) == cycle_bound - number_simplecycles + 1, check again. + return [] + else: + + # calculate canonical representation for each simple cycle + all_canonkeys = [] + for cycle in simple_cycles: + canonlist = [ G.node[node][node_label] + G[node][cycle[cycle.index(node) + 1]][edge_label] for node in cycle[:-1] ] + canonkey = ''.join(canonlist) + canonkey = canonkey if canonkey < canonkey[::-1] else canonkey[::-1] + for i in range(1, len(cycle[:-1])): + canonlist = [ G.node[node][node_label] + G[node][cycle[cycle.index(node) + 1]][edge_label] for node in cycle[i:-1] + cycle[:i] ] + canonkey_t = ''.join(canonlist) + canonkey_t = canonkey_t if canonkey_t < canonkey_t[::-1] else canonkey_t[::-1] + canonkey = canonkey if canonkey < canonkey_t else canonkey_t + all_canonkeys.append(canonkey) + + patterns = list(set(patterns) | set(all_canonkeys)) + number_simplecycles += len(simple_cycles) + else: + bridges.add_edges_from(subgraph.edges(data=True)) + + # calculate canonical representation for each connected component in bridge set + components = list(nx.connected_component_subgraphs(bridges)) # all connected components in the bridge + tree_patterns = [] + for tree in components: + break + + + + # patterns += pi(bridges) + return patterns diff --git a/pygraph/kernels/deltaKernel.py b/pygraph/kernels/deltaKernel.py index fd35d8c..c579c93 100644 --- a/pygraph/kernels/deltaKernel.py +++ b/pygraph/kernels/deltaKernel.py @@ -1,18 +1,18 @@ def deltakernel(condition): """Return 1 if condition holds, 0 otherwise. - + Parameters ---------- condition : Boolean A condition, according to which the kernel is set to 1 or 0. - + Return ------ kernel : integer Delta kernel. - + References ---------- [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. """ - return (1 if condition else 0) \ No newline at end of file + return condition #(1 if condition else 0) diff --git a/pygraph/kernels/pathKernel.py b/pygraph/kernels/pathKernel.py index cb2b244..e5dfa63 100644 --- a/pygraph/kernels/pathKernel.py +++ b/pygraph/kernels/pathKernel.py @@ -1,3 +1,8 @@ +""" +@author: linlin +@references: Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360). +""" + import sys import pathlib sys.path.insert(0, "../") @@ -27,10 +32,6 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'): ------ Kmatrix/kernel : Numpy matrix/float Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs. - - References - ---------- - [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360). """ some_graph = args[0][0] if len(args) == 1 else args[0] # only edge attributes of type int or float can be used as edge weight to calculate the shortest paths. some_weight = list(nx.get_edge_attributes(some_graph, edge_label).values())[0] @@ -42,9 +43,11 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'): start_time = time.time() + splist = [ get_shortest_paths(Gn[i], weight) for i in range(0, len(Gn)) ] + for i in range(0, len(Gn)): for j in range(i, len(Gn)): - Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], node_label, edge_label, weight = weight) + Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], splist[i], splist[j], node_label, edge_label) Kmatrix[j][i] = Kmatrix[i][j] run_time = time.time() - start_time @@ -55,7 +58,10 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'): else: # for only 2 graphs start_time = time.time() - kernel = _pathkernel_do(args[0], args[1], node_label, edge_label, weight = weight) + splist = get_shortest_paths(args[0], weight) + splist = get_shortest_paths(args[1], weight) + + kernel = _pathkernel_do(args[0], args[1], sp1, sp2, node_label, edge_label) run_time = time.time() - start_time print("\n --- mean average path kernel built in %s seconds ---" % (run_time)) @@ -63,19 +69,19 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'): return kernel, run_time -def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight = None): +def _pathkernel_do(G1, G2, sp1, sp2, node_label = 'atom', edge_label = 'bond_type'): """Calculate mean average path kernel between 2 graphs. Parameters ---------- G1, G2 : NetworkX graphs 2 graphs between which the kernel is calculated. + sp1, sp2 : list of list + List of shortest paths of 2 graphs, where each path is represented by a list of nodes. node_label : string node attribute used as label. The default node label is atom. edge_label : string edge attribute used as label. The default edge label is bond_type. - weight : string/None - edge attribute used as weight to calculate the shortest path. The default edge label is None. Return ------ @@ -83,30 +89,62 @@ def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight Path Kernel between 2 graphs. """ # calculate shortest paths for both graphs - sp1 = [] - num_nodes = G1.number_of_nodes() - for node1 in range(num_nodes): - for node2 in range(node1 + 1, num_nodes): - sp1.append(nx.shortest_path(G1, node1, node2, weight = weight)) - - sp2 = [] - num_nodes = G2.number_of_nodes() - for node1 in range(num_nodes): - for node2 in range(node1 + 1, num_nodes): - sp2.append(nx.shortest_path(G2, node1, node2, weight = weight)) # calculate kernel kernel = 0 for path1 in sp1: for path2 in sp2: if len(path1) == len(path2): - kernel_path = deltakernel(G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label]) + kernel_path = (G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label]) if kernel_path: for i in range(1, len(path1)): # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0 - kernel_path *= deltakernel(G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) * deltakernel(G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label]) + kernel_path *= (G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) \ + * (G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label]) + if kernel_path == 0: + break kernel += kernel_path # add up kernels of all paths + # kernel = 0 + # for path1 in sp1: + # for path2 in sp2: + # if len(path1) == len(path2): + # if (G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label]): + # for i in range(1, len(path1)): + # # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0 + # # kernel_path *= (G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) \ + # # * (G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label]) + # # if kernel_path == 0: + # # break + # # kernel += kernel_path # add up kernels of all paths + # if (G1[path1[i - 1]][path1[i]][edge_label] != G2[path2[i - 1]][path2[i]][edge_label]) or \ + # (G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]): + # break + # else: + # kernel += 1 + kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average return kernel + +def get_shortest_paths(G, weight): + """Get all shortest paths of a graph. + + Parameters + ---------- + G : NetworkX graphs + The graphs whose paths are calculated. + weight : string/None + edge attribute used as weight to calculate the shortest path. + + Return + ------ + sp : list of list + List of shortest paths of the graph, where each path is represented by a list of nodes. + """ + sp = [] + num_nodes = G.number_of_nodes() + for node1 in range(num_nodes): + for node2 in range(node1 + 1, num_nodes): + sp.append(nx.shortest_path(G, node1, node2, weight = weight)) + return sp diff --git a/pygraph/kernels/results.md b/pygraph/kernels/results.md index b0b50e8..64e2fe3 100644 --- a/pygraph/kernels/results.md +++ b/pygraph/kernels/results.md @@ -1,20 +1,26 @@ # Results with minimal test RMSE for each kernel on dataset Asyclic -All kernels are tested on dataset Asyclic, which consists of 185 molecules (graphs). +All kernels expect for Cyclic pattern kernel are tested on dataset Asyclic, which consists of 185 molecules (graphs). (Cyclic pattern kernel is tested on dataset MAO and PAH.) The criteria used for prediction are SVM for classification and kernel Ridge regression for regression. For predition we randomly divide the data in train and test subset, where 90% of entire dataset is for training and rest for testing. 10 splits are performed. For each split, we first train on the train data, then evaluate the performance on the test set. We choose the optimal parameters for the test set and finally provide the corresponding performance. The final results correspond to the average of the performances on the test sets. +All the results were run under Python 3.5.2, in a machine of 64 bit with one Intel(R) Core(TM) i7-7920HQ CPU @ 3.10GHz, Memory of 32GB, and Ubuntu 16.04.3 LTS OS. + ## Summary -| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time | -|---------------|:-------:|:------:|-------------:|-------:| -| Shortest path | 35.19 | 4.50 | - | 14.58" | -| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" | -| Path | 14.00 | 6.94 | - | 37.58" | -| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" | -| Treelet | 8.31 | 3.38 | - | 0.50" | -| Path up to d | 7.43 | 2.69 | depth = 2 | 0.52" | +| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time | +|------------------|:-------:|:------:|------------------:|-------:| +| Shortest path | 35.19 | 4.50 | - | 14.58" | +| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" | +| Path | 14.00 | 6.94 | - | 37.58" | +| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" | +| WL shortest path | 35.16 | 4.50 | height = 2 | 40.24" | +| WL edge | 33.41 | 4.73 | height = 5 | 5.66" | +| Treelet | 8.31 | 3.38 | - | 0.50" | +| Path up to d | 7.43 | 2.69 | depth = 2 | 0.52" | +| Tree pattern | 7.27 | 2.21 | lamda = 1, h = 2 | 37.24" | +| Cyclic pattern | 0.9 | 0.11 | cycle bound = 100 | 0.31" | * RMSE stands for arithmetic mean of the root mean squared errors on all splits. * STD stands for standard deviation of the root mean squared errors on all splits. @@ -76,6 +82,42 @@ The table below shows the results of the WL subtree under different subtree heig 10 17.1864 4.05672 0.691516 0.564621 5.00918 ``` +### Weisfeiler-Lehman shortest path kernel +The table below shows the results of the WL subtree under different subtree heights. +``` + height rmse_test std_test rmse_train std_train k_time +-------- ----------- ---------- ------------ ----------- -------- + 0 35.192 4.49577 28.3604 1.35718 13.5041 + 1 35.1808 4.50045 27.9335 1.44836 26.8292 + 2 35.1632 4.50205 28.1113 1.50891 40.2356 + 3 35.1946 4.49801 28.3903 1.36571 54.6704 + 4 35.1753 4.50111 27.9746 1.46222 67.1522 + 5 35.1997 4.5071 28.0184 1.45564 80.0881 + 6 35.1645 4.49849 28.3731 1.60057 92.1925 + 7 35.1771 4.5009 27.9604 1.45742 105.812 + 8 35.1968 4.50526 28.1991 1.5149 119.022 + 9 35.1956 4.50197 28.2665 1.30769 131.228 + 10 35.1676 4.49723 28.4163 1.61596 144.964 +``` + +### Weisfeiler-Lehman edge kernel +The table below shows the results of the WL subtree under different subtree heights. +``` + height rmse_test std_test rmse_train std_train k_time +-------- ----------- ---------- ------------ ----------- --------- + 0 33.4077 4.73272 29.9975 0.90234 0.853002 + 1 33.4235 4.72131 30.1603 1.09423 1.71751 + 2 33.433 4.72441 29.9286 0.787941 2.66032 + 3 33.4073 4.73243 30.0114 0.909674 3.47763 + 4 33.4256 4.72166 30.1842 1.1089 4.54367 + 5 33.4067 4.72641 30.0411 1.01845 5.66178 + 6 33.419 4.73075 29.9056 0.782179 6.14803 + 7 33.4248 4.72155 30.1759 1.10382 7.60354 + 8 33.4122 4.71554 30.1365 1.07485 7.97222 + 9 33.4071 4.73193 30.0329 0.921065 9.07084 + 10 33.4165 4.73169 29.9242 0.790843 10.0254 +``` + ### Treelet kernel **The targets of training data are normalized before calculating the kernel.** ``` @@ -87,7 +129,7 @@ The table below shows the results of the WL subtree under different subtree heig ### Path kernel up to depth *d* The table below shows the results of the path kernel up to different depth *d*. -The first table is the results using Tanimoto kernel, where **The targets of training data are normalized before calculating the kernel.**. +The first table is the results using *Tanimoto kernel*, where **The targets of training data are normalized before calculating the kernel.**. ``` depth rmse_test std_test rmse_train std_train k_time ------- ----------- ---------- ------------ ----------- --------- @@ -104,7 +146,7 @@ The first table is the results using Tanimoto kernel, where **The targets of tra 10 19.8708 5.09217 10.7787 2.10002 2.41006 ``` -The second table is the results using MinMax kernel. +The second table is the results using *MinMax kernel*. ``` depth rmse_test std_test rmse_train std_train k_time ------- ----------- ---------- ------------ ----------- -------- @@ -120,3 +162,62 @@ depth rmse_test std_test rmse_train std_train k_time 9 13.1789 5.27707 1.36002 1.84834 1.96545 10 13.2538 5.26425 1.36208 1.85426 2.24943 ``` + + +### Tree pattern kernel +Until N kernel when h = 2: +``` + lmda rmse_test std_test rmse_train std_train k_time +----------- ----------- ---------- ------------ ----------- -------- + 1e-10 7.46524 1.71862 5.99486 0.356634 38.1447 + 1e-09 7.37326 1.77195 5.96155 0.374395 37.4921 + 1e-08 7.35105 1.78349 5.96481 0.378047 37.9971 + 1e-07 7.35213 1.77903 5.96728 0.382251 38.3182 + 1e-06 7.3524 1.77992 5.9696 0.3863 39.6428 + 1e-05 7.34958 1.78141 5.97114 0.39017 37.3711 + 0.0001 7.3513 1.78136 5.94251 0.331843 37.3967 + 0.001 7.35822 1.78119 5.9326 0.32534 36.7357 + 0.01 7.37552 1.79037 5.94089 0.34763 36.8864 + 0.1 7.32951 1.91346 6.42634 1.29405 36.8382 + 1 7.27134 2.20774 6.62425 1.2242 37.2425 + 10 7.49787 2.36815 6.81697 1.50182 37.8286 + 100 7.42887 2.64789 6.68766 1.34809 36.3701 + 1000 7.24914 2.65554 6.81906 1.41008 36.1695 + 10000 7.08183 2.6248 6.93431 1.38441 37.5723 +100000 8.021 3.43694 8.69813 0.909839 37.8158 + 1e+06 8.49625 3.6332 9.59333 0.96626 38.4688 + 1e+07 10.9067 3.17593 11.5642 2.07792 36.9926 + 1e+08 61.1524 10.4355 65.3527 13.9538 37.1321 + 1e+09 99.943 13.6994 98.8848 5.27014 36.7443 + 1e+10 100.083 13.8503 97.9168 3.22768 37.096 +``` + +### Cyclic pattern kernel +**This kernel is not tested on dataset Acyclic** + +Results on dataset MAO: +``` +cycle_bound accur_test std_test accur_train std_train k_time +------------- ------------ ---------- ------------- ----------- -------- + 0 0.642857 0.146385 0.54918 0.0167983 0.187052 + 50 0.871429 0.1 0.698361 0.116889 0.300629 + 100 0.9 0.111575 0.732787 0.0826366 0.309837 + 150 0.9 0.111575 0.732787 0.0826366 0.31808 + 200 0.9 0.111575 0.732787 0.0826366 0.317575 +``` + +Results on dataset PAH: +``` + cycle_bound accur_test std_test accur_train std_train k_time +------------- ------------ ---------- ------------- ----------- -------- + 0 0.61 0.113578 0.629762 0.0135212 0.521801 + 10 0.61 0.113578 0.629762 0.0135212 0.52589 + 20 0.61 0.113578 0.629762 0.0135212 0.548528 + 30 0.64 0.111355 0.633333 0.0157935 0.535311 + 40 0.64 0.111355 0.633333 0.0157935 0.61764 + 50 0.67 0.09 0.658333 0.0345238 0.733868 + 60 0.68 0.107703 0.671429 0.0365769 0.871147 + 70 0.67 0.100499 0.666667 0.0380208 1.12625 + 80 0.78 0.107703 0.709524 0.0588534 1.19828 + 90 0.78 0.107703 0.709524 0.0588534 1.21182 +``` diff --git a/pygraph/kernels/spKernel.py b/pygraph/kernels/spKernel.py index 0b2c024..e4ccd32 100644 --- a/pygraph/kernels/spKernel.py +++ b/pygraph/kernels/spKernel.py @@ -1,3 +1,8 @@ +""" +@author: linlin +@references: Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. +""" + import sys import pathlib sys.path.insert(0, "../") @@ -12,7 +17,7 @@ from pygraph.utils.utils import getSPGraph def spkernel(*args, edge_weight = 'bond_type'): """Calculate shortest-path kernels between graphs. - + Parameters ---------- Gn : List of NetworkX graph @@ -22,51 +27,33 @@ def spkernel(*args, edge_weight = 'bond_type'): 2 graphs between which the kernel is calculated. edge_weight : string edge attribute corresponding to the edge weight. The default edge weight is bond_type. - + Return ------ Kmatrix/kernel : Numpy matrix/float Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP kernel between 2 graphs. - - References - ---------- - [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. """ - if len(args) == 1: # for a list of graphs - Gn = args[0] - - Kmatrix = np.zeros((len(Gn), len(Gn))) - - Sn = [] # get shortest path graphs of Gn - for i in range(0, len(Gn)): - Sn.append(getSPGraph(Gn[i], edge_weight = edge_weight)) + Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list + Kmatrix = np.zeros((len(Gn), len(Gn))) + + start_time = time.time() + + Gn = [ getSPGraph(G, edge_weight = edge_weight) for G in args[0] ] # get shortest path graphs of Gn + + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + # kernel_t = [ e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])) \ + # for e1 in Sn[i].edges(data = True) for e2 in Sn[j].edges(data = True) ] + # Kmatrix[i][j] = np.sum(kernel_t) + # Kmatrix[j][i] = Kmatrix[i][j] - start_time = time.time() - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - for e1 in Sn[i].edges(data = True): - for e2 in Sn[j].edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - Kmatrix[i][j] += 1 - Kmatrix[j][i] += (0 if i == j else 1) + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] - run_time = time.time() - start_time - print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time)) - - return Kmatrix, run_time - - else: # for only 2 graphs - G1 = getSPGraph(args[0], edge_weight = edge_weight) - G2 = getSPGraph(args[1], edge_weight = edge_weight) - - kernel = 0 - - start_time = time.time() - for e1 in G1.edges(data = True): - for e2 in G2.edges(data = True): - if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): - kernel += 1 + run_time = time.time() - start_time + print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time)) -# print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time)) - - return kernel \ No newline at end of file + return Kmatrix, run_time \ No newline at end of file diff --git a/pygraph/kernels/treePatternKernel.py b/pygraph/kernels/treePatternKernel.py new file mode 100644 index 0000000..34815f3 --- /dev/null +++ b/pygraph/kernels/treePatternKernel.py @@ -0,0 +1,198 @@ +""" +@author: linlin +@references: Pierre Mahé and Jean-Philippe Vert. Graph kernels based on tree patterns for molecules. Machine learning, 75(1):3–35, 2009. +""" + +import sys +import pathlib +sys.path.insert(0, "../") +import time + +from collections import Counter + +import networkx as nx +import numpy as np + + +def treepatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, kernel_type = 'untiln', lmda = 1, h = 1): + """Calculate tree pattern graph kernels between graphs. + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + / + G1, G2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + node_label : string + node attribute used as label. The default node label is atom. + edge_label : string + edge attribute used as label. The default edge label is bond_type. + labeled : boolean + Whether the graphs are labeled. The default is True. + depth : integer + Depth of search. Longest length of paths. + k_func : function + A kernel function used using different notions of fingerprint similarity. + + Return + ------ + Kmatrix: Numpy matrix + Kernel matrix, each element of which is the tree pattern graph kernel between 2 praphs. + """ + if h < 1: + raise Exception('h > 0 is requested.') + kernel_type = kernel_type.lower() + Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list + Kmatrix = np.zeros((len(Gn), len(Gn))) + h = int(h) + + start_time = time.time() + + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + Kmatrix[i][j] = _treepatternkernel_do(Gn[i], Gn[j], node_label, edge_label, labeled, kernel_type, lmda, h) + Kmatrix[j][i] = Kmatrix[i][j] + + run_time = time.time() - start_time + print("\n --- kernel matrix of tree pattern kernel of size %d built in %s seconds ---" % (len(Gn), run_time)) + + return Kmatrix, run_time + + +def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type, lmda, h): + """Calculate tree pattern graph kernels between 2 graphs. + + Parameters + ---------- + paths1, paths2 : list + List of paths in 2 graphs, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path. + k_func : function + A kernel function used using different notions of fingerprint similarity. + node_label : string + node attribute used as label. The default node label is atom. + edge_label : string + edge attribute used as label. The default edge label is bond_type. + labeled : boolean + Whether the graphs are labeled. The default is True. + + Return + ------ + kernel : float + Treelet Kernel between 2 graphs. + """ + + def matchingset(n1, n2): + """Get neiborhood matching set of two nodes in two graphs. + """ + + def mset_com(allpairs, length): + """Find all sets R of pairs by combination. + """ + if length == 1: + mset = [ [pair] for pair in allpairs ] + return mset, mset + else: + mset, mset_l = mset_com(allpairs, length - 1) + mset_tmp = [] + for pairset in mset_l: # for each pair set of length l-1 + nodeset1 = [ pair[0] for pair in pairset ] # nodes already in the set + nodeset2 = [ pair[1] for pair in pairset ] + for pair in allpairs: + if (pair[0] not in nodeset1) and (pair[1] not in nodeset2): # nodes in R should be unique + mset_tmp.append(pairset + [pair]) # add this pair to the pair set of length l-1, constructing a new set of length l + nodeset1.append(pair[0]) + nodeset2.append(pair[1]) + + mset.extend(mset_tmp) + + return mset, mset_tmp + + + allpairs = [] # all pairs those have the same node labels and edge labels + for neighbor1 in G1[n1]: + for neighbor2 in G2[n2]: + if G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label] \ + and G1[n1][neighbor1][edge_label] == G2[n2][neighbor2][edge_label]: + allpairs.append([neighbor1, neighbor2]) + + if allpairs != []: + mset, _ = mset_com(allpairs, len(allpairs)) + else: + mset = [] + + return mset + + + def kernel_h(h): + """Calculate kernel of h-th iteration. + """ + + if kernel_type == 'untiln': + all_kh = { str(n1) + '.' + str(n2) : (G1.node[n1][node_label] == G2.node[n2][node_label]) \ + for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ] + all_kh_tmp = all_kh.copy() + for i in range(2, h + 1): + for n1 in G1.nodes(): + for n2 in G2.nodes(): + kh = 0 + mset = all_msets[str(n1) + '.' + str(n2)] + for R in mset: + kh_tmp = 1 + for pair in R: + kh_tmp *= lmda * all_kh[str(pair[0]) + '.' + str(pair[1])] + kh += 1 / lmda * kh_tmp + kh = (G1.node[n1][node_label] == G2.node[n2][node_label]) * (1 + kh) + all_kh_tmp[str(n1) + '.' + str(n2)] = kh + all_kh = all_kh_tmp.copy() + + elif kernel_type == 'size': + all_kh = { str(n1) + '.' + str(n2) : lmda * (G1.node[n1][node_label] == G2.node[n2][node_label]) \ + for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ] + all_kh_tmp = all_kh.copy() + for i in range(2, h + 1): + for n1 in G1.nodes(): + for n2 in G2.nodes(): + kh = 0 + mset = all_msets[str(n1) + '.' + str(n2)] + for R in mset: + kh_tmp = 1 + for pair in R: + kh_tmp *= lmda * all_kh[str(pair[0]) + '.' + str(pair[1])] + kh += kh_tmp + kh *= lmda * (G1.node[n1][node_label] == G2.node[n2][node_label]) + all_kh_tmp[str(n1) + '.' + str(n2)] = kh + all_kh = all_kh_tmp.copy() + + elif kernel_type == 'branching': + all_kh = { str(n1) + '.' + str(n2) : (G1.node[n1][node_label] == G2.node[n2][node_label]) \ + for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ] + all_kh_tmp = all_kh.copy() + for i in range(2, h + 1): + for n1 in G1.nodes(): + for n2 in G2.nodes(): + kh = 0 + mset = all_msets[str(n1) + '.' + str(n2)] + for R in mset: + kh_tmp = 1 + for pair in R: + kh_tmp *= lmda * all_kh[str(pair[0]) + '.' + str(pair[1])] + kh += 1 / lmda * kh_tmp + kh *= (G1.node[n1][node_label] == G2.node[n2][node_label]) + all_kh_tmp[str(n1) + '.' + str(n2)] = kh + all_kh = all_kh_tmp.copy() + + return all_kh + + + + # calculate matching sets for every pair of nodes at first to avoid calculating in every iteration. + all_msets = ({ str(node1) + '.' + str(node2) : matchingset(node1, node2) for node1 in G1.nodes() \ + for node2 in G2.nodes() } if h > 1 else {}) + + all_kh = kernel_h(h) + kernel = sum(all_kh.values()) + + if kernel_type == 'size': + kernel = kernel / (lmda ** h) + + return kernel diff --git a/pygraph/kernels/treeletKernel.py b/pygraph/kernels/treeletKernel.py index 073150a..d364cc3 100644 --- a/pygraph/kernels/treeletKernel.py +++ b/pygraph/kernels/treeletKernel.py @@ -1,3 +1,8 @@ +""" +@author: linlin +@references: Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. +""" + import sys import pathlib sys.path.insert(0, "../") @@ -12,7 +17,7 @@ import numpy as np def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True): """Calculate treelet graph kernels between graphs. - + Parameters ---------- Gn : List of NetworkX graph @@ -26,7 +31,7 @@ def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled edge attribute used as label. The default edge label is bond_type. labeled : boolean Whether the graphs are labeled. The default is True. - + Return ------ Kmatrix/kernel : Numpy matrix/float @@ -37,11 +42,11 @@ def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled Kmatrix = np.zeros((len(Gn), len(Gn))) start_time = time.time() - + # get all canonical keys of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset. canonkeys = [ get_canonkeys(Gn[i], node_label = node_label, edge_label = edge_label, labeled = labeled) \ for i in range(0, len(Gn)) ] - + for i in range(0, len(Gn)): for j in range(i, len(Gn)): Kmatrix[i][j] = _treeletkernel_do(canonkeys[i], canonkeys[j], node_label = node_label, edge_label = edge_label, labeled = labeled) @@ -49,7 +54,7 @@ def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled run_time = time.time() - start_time print("\n --- treelet kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time)) - + return Kmatrix, run_time else: # for only 2 graphs @@ -112,10 +117,6 @@ def get_canonkeys(G, node_label = 'atom', edge_label = 'bond_type', labeled = Tr ------ canonkey/canonkey_l : dict For unlabeled graphs, canonkey is a dictionary which records amount of every tree pattern. For labeled graphs, canonkey_l is one which keeps track of amount of every treelet. - - References - ---------- - [1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. """ patterns = {} # a dictionary which consists of lists of patterns for all graphlet. canonkey = {} # canonical key, a dictionary which records amount of every tree pattern. @@ -126,7 +127,7 @@ def get_canonkeys(G, node_label = 'atom', edge_label = 'bond_type', labeled = Tr # linear patterns patterns['0'] = G.nodes() canonkey['0'] = nx.number_of_nodes(G) - for i in range(1, 6): + for i in range(1, 6): # for i in range(1, 6): patterns[str(i)] = find_all_paths(G, i) canonkey[str(i)] = len(patterns[str(i)]) @@ -227,7 +228,7 @@ def get_canonkeys(G, node_label = 'atom', edge_label = 'bond_type', labeled = Tr for key in canonkey_t: canonkey_l['0' + key] = canonkey_t[key] - for i in range(1, 6): + for i in range(1, 6): # for i in range(1, 6): treelet = [] for pattern in patterns[str(i)]: canonlist = list(chain.from_iterable((G.node[node][node_label], \ @@ -378,4 +379,4 @@ def find_all_paths(G, length): all_paths[idx] = [] break - return list(filter(lambda a: a != [], all_paths)) \ No newline at end of file + return list(filter(lambda a: a != [], all_paths)) diff --git a/pygraph/kernels/untildPathKernel.py b/pygraph/kernels/untildPathKernel.py index 8b91536..09e47fb 100644 --- a/pygraph/kernels/untildPathKernel.py +++ b/pygraph/kernels/untildPathKernel.py @@ -1,3 +1,8 @@ +""" +@author: linlin +@references: Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre Baldi. Graph kernels for chemical informatics. Neural networks, 18(8):1093–1110, 2005. +""" + import sys import pathlib sys.path.insert(0, "../") @@ -40,7 +45,7 @@ def untildpathkernel(*args, node_label = 'atom', edge_label = 'bond_type', label Kmatrix = np.zeros((len(Gn), len(Gn))) start_time = time.time() - + # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset. all_paths = [ find_all_paths_until_length(Gn[i], depth, node_label = node_label, edge_label = edge_label, labeled = labeled) for i in range(0, len(Gn)) ] @@ -187,7 +192,7 @@ def find_all_paths(G, length): all_paths = [] for node in G: all_paths.extend(find_paths(G, node, length)) - + ### The following process is not carried out according to the original article # all_paths_r = [ path[::-1] for path in all_paths ] @@ -200,4 +205,4 @@ def find_all_paths(G, length): # break # return list(filter(lambda a: a != [], all_paths)) - return all_paths \ No newline at end of file + return all_paths diff --git a/pygraph/kernels/weisfeilerLehmanKernel.py b/pygraph/kernels/weisfeilerLehmanKernel.py index e2d2bd2..8c520a5 100644 --- a/pygraph/kernels/weisfeilerLehmanKernel.py +++ b/pygraph/kernels/weisfeilerLehmanKernel.py @@ -1,13 +1,8 @@ -import sys -import pathlib -sys.path.insert(0, "../") - -import networkx as nx -import numpy as np -import time - -from pygraph.kernels.spkernel import spkernel -from pygraph.kernels.pathKernel import pathkernel +""" +@author: linlin +@references: + [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61. +""" import sys import pathlib @@ -18,7 +13,6 @@ import networkx as nx import numpy as np import time -from pygraph.kernels.spkernel import spkernel from pygraph.kernels.pathKernel import pathkernel def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'): @@ -38,97 +32,66 @@ def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', height : int subtree height base_kernel : string - base kernel used in each iteration of WL kernel. The default base kernel is subtree kernel. - + base kernel used in each iteration of WL kernel. The default base kernel is subtree kernel. For user-defined kernel, base_kernel is the name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs. + Return ------ - Kmatrix/kernel : Numpy matrix/float - Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman kernel between 2 graphs. - + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + Notes ----- - This function now supports WL subtree kernel and WL shortest path kernel. - - References - ---------- - [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61. + This function now supports WL subtree kernel, WL shortest path kernel and WL edge kernel. """ - if len(args) == 1: # for a list of graphs - start_time = time.time() - - # for WL subtree kernel - if base_kernel == 'subtree': - Kmatrix = _wl_subtreekernel_do(args[0], node_label, edge_label, height = height, base_kernel = 'subtree') - - # for WL edge kernel - elif base_kernel == 'edge': - print('edge') - - # for WL shortest path kernel - elif base_kernel == 'sp': - Gn = args[0] - Kmatrix = np.zeros((len(Gn), len(Gn))) - - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j], height = height) - Kmatrix[j][i] = Kmatrix[i][j] + base_kernel = base_kernel.lower() + Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list + Kmatrix = np.zeros((len(Gn), len(Gn))) - run_time = time.time() - start_time - print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" % (base_kernel, len(args[0]), run_time)) - - return Kmatrix, run_time - - else: # for only 2 graphs - - start_time = time.time() - - # for WL subtree kernel - if base_kernel == 'subtree': - - args = [args[0], args[1]] - kernel = _wl_subtreekernel_do(args, node_label, edge_label, height = height, base_kernel = 'subtree') - - # for WL edge kernel - elif base_kernel == 'edge': - print('edge') - - # for WL shortest path kernel - elif base_kernel == 'sp': - + start_time = time.time() - kernel = _pathkernel_do(args[0], args[1]) + # for WL subtree kernel + if base_kernel == 'subtree': + Kmatrix = _wl_subtreekernel_do(args[0], node_label, edge_label, height) - run_time = time.time() - start_time - print("\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---" % (base_kernel, run_time)) - - return kernel, run_time - - -def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'): + # for WL shortest path kernel + elif base_kernel == 'sp': + Kmatrix = _wl_spkernel_do(args[0], node_label, edge_label, height) + + # for WL edge kernel + elif base_kernel == 'edge': + Kmatrix = _wl_edgekernel_do(args[0], node_label, edge_label, height) + + # for user defined base kernel + else: + Kmatrix = _wl_userkernel_do(args[0], node_label, edge_label, height, base_kernel) + + run_time = time.time() - start_time + print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" % (base_kernel, len(args[0]), run_time)) + + return Kmatrix, run_time + + + +def _wl_subtreekernel_do(Gn, node_label, edge_label, height): """Calculate Weisfeiler-Lehman subtree kernels between graphs. - + Parameters ---------- Gn : List of NetworkX graph List of graphs between which the kernels are calculated. node_label : string - node attribute used as label. The default node label is atom. + node attribute used as label. edge_label : string - edge attribute used as label. The default edge label is bond_type. + edge attribute used as label. height : int - subtree height - base_kernel : string - base kernel used in each iteration of WL kernel. The default base kernel is subtree kernel. - + subtree height. + Return ------ - Kmatrix/kernel : Numpy matrix/float + Kmatrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ - height = int(height) - Gn = args[0] Kmatrix = np.zeros((len(Gn), len(Gn))) all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs @@ -148,9 +111,9 @@ def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', h num_of_labels = len(num_of_each_label) # number of all unique labels all_labels_ori.update(labels_ori) - + all_num_of_labels_occured += len(all_labels_ori) - + # calculate subtree kernel with the 0th iteration and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): @@ -159,17 +122,17 @@ def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', h vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ]) Kmatrix[i][j] += np.dot(vector1, vector2.transpose()) Kmatrix[j][i] = Kmatrix[i][j] - + # iterate each height for h in range(1, height + 1): all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs all_labels_ori = set() all_num_of_each_label = [] - + # for each graph for idx, G in enumerate(Gn): - + set_multisets = [] for node in G.nodes(data = True): # Multiset-label determination. @@ -190,9 +153,9 @@ def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', h else: set_compressed.update({ value : str(num_of_labels_occured + 1) }) num_of_labels_occured += 1 - + all_set_compressed.update(set_compressed) - + # relabel nodes for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] @@ -202,9 +165,9 @@ def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', h all_labels_ori.update(labels_comp) num_of_each_label = dict(Counter(labels_comp)) all_num_of_each_label.append(num_of_each_label) - + all_num_of_labels_occured += len(all_labels_ori) - + # calculate subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): @@ -213,87 +176,228 @@ def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', h vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ]) Kmatrix[i][j] += np.dot(vector1, vector2.transpose()) Kmatrix[j][i] = Kmatrix[i][j] - + return Kmatrix - - -def _weisfeilerlehmankernel_do(G1, G2, height = 0): - """Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration. + + +def _wl_spkernel_do(Gn, node_label, edge_label, height): + """Calculate Weisfeiler-Lehman shortest path kernels between graphs. Parameters ---------- - G1, G2 : NetworkX graphs - 2 graphs between which the kernel is calculated. + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + node_label : string + node attribute used as label. + edge_label : string + edge attribute used as label. + height : int + subtree height. Return ------ - kernel : float - Weisfeiler-Lehman kernel between 2 graphs. + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ - + from pygraph.utils.utils import getSPGraph + # init. height = int(height) - kernel = 0 # init kernel - num_nodes1 = G1.number_of_nodes() - num_nodes2 = G2.number_of_nodes() - - # the first iteration. -# labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) } -# labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) } - kernel += spkernel(G1, G2) # change your base kernel here (and one more below) + Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel + + Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn - for h in range(0, height + 1): -# if labelset1 != labelset2: -# break + # initial for height = 0 + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + # iterate each height + for h in range(1, height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs + for G in Gn: # for each graph + set_multisets = [] + for node in G.nodes(data = True): + # Multiset-label determination. + multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] + # sorting each multiset + multiset.sort() + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + + # label compression + set_unique = list(set(set_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({ value : all_set_compressed[value] }) + else: + set_compressed.update({ value : str(num_of_labels_occured + 1) }) + num_of_labels_occured += 1 + + all_set_compressed.update(set_compressed) + + # relabel nodes + for node in G.nodes(data = True): + node[1][node_label] = set_compressed[set_multisets[node[0]]] + + # calculate subtree kernel with h iterations and add it to the final kernel + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + return Kmatrix - # Weisfeiler-Lehman test of graph isomorphism. - relabel(G1) - relabel(G2) - # calculate kernel - kernel += spkernel(G1, G2) # change your base kernel here (and one more before) - # get label sets of both graphs -# labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) } -# labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) } +def _wl_edgekernel_do(Gn, node_label, edge_label, height): + """Calculate Weisfeiler-Lehman edge kernels between graphs. - return kernel + Parameters + ---------- + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + node_label : string + node attribute used as label. + edge_label : string + edge attribute used as label. + height : int + subtree height. + + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + """ + # init. + height = int(height) + Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel + + # initial for height = 0 + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + # iterate each height + for h in range(1, height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs + for G in Gn: # for each graph + set_multisets = [] + for node in G.nodes(data = True): + # Multiset-label determination. + multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] + # sorting each multiset + multiset.sort() + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + # label compression + set_unique = list(set(set_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({ value : all_set_compressed[value] }) + else: + set_compressed.update({ value : str(num_of_labels_occured + 1) }) + num_of_labels_occured += 1 -def relabel(G): - ''' - Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism. + all_set_compressed.update(set_compressed) + + # relabel nodes + for node in G.nodes(data = True): + node[1][node_label] = set_compressed[set_multisets[node[0]]] + + # calculate subtree kernel with h iterations and add it to the final kernel + for i in range(0, len(Gn)): + for j in range(i, len(Gn)): + for e1 in Gn[i].edges(data = True): + for e2 in Gn[j].edges(data = True): + if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): + Kmatrix[i][j] += 1 + Kmatrix[j][i] = Kmatrix[i][j] + + return Kmatrix + + +def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): + """Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. Parameters ---------- - G : NetworkX graph - The graphs whose nodes are relabeled. - ''' - - # get the set of original labels - labels_ori = list(nx.get_node_attributes(G, 'label').values()) - num_of_each_label = dict(Counter(labels_ori)) - num_of_labels = len(num_of_each_label) - - set_multisets = [] - for node in G.nodes(data = True): - # Multiset-label determination. - multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ] - # sorting each multiset - multiset.sort() - multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix - set_multisets.append(multiset) + Gn : List of NetworkX graph + List of graphs between which the kernels are calculated. + node_label : string + node attribute used as label. + edge_label : string + edge attribute used as label. + height : int + subtree height. + base_kernel : string + Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs. - # label compression -# set_multisets.sort() # this is unnecessary - set_unique = list(set(set_multisets)) # set of unique multiset labels - set_compressed = { value : str(set_unique.index(value) + num_of_labels + 1) for value in set_unique } # assign new labels - - # relabel nodes -# nx.relabel_nodes(G, set_compressed, copy = False) - for node in G.nodes(data = True): - node[1]['label'] = set_compressed[set_multisets[node[0]]] - - # get the set of compressed labels - labels_comp = list(nx.get_node_attributes(G, 'label').values()) - num_of_each_label.update(dict(Counter(labels_comp))) + Return + ------ + Kmatrix : Numpy matrix + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. + """ + # init. + height = int(height) + Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel + + # initial for height = 0 + Kmatrix = base_kernel(Gn, node_label, edge_label) + + # iterate each height + for h in range(1, height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs + for G in Gn: # for each graph + set_multisets = [] + for node in G.nodes(data = True): + # Multiset-label determination. + multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] + # sorting each multiset + multiset.sort() + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + + # label compression + set_unique = list(set(set_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({ value : all_set_compressed[value] }) + else: + set_compressed.update({ value : str(num_of_labels_occured + 1) }) + num_of_labels_occured += 1 + + all_set_compressed.update(set_compressed) + + # relabel nodes + for node in G.nodes(data = True): + node[1][node_label] = set_compressed[set_multisets[node[0]]] + + # calculate kernel with h iterations and add it to the final kernel + Kmatrix += base_kernel(Gn, node_label, edge_label) + + return Kmatrix diff --git a/pygraph/utils/__pycache__/graphfiles.cpython-35.pyc b/pygraph/utils/__pycache__/graphfiles.cpython-35.pyc index 5a901fd..cf561df 100644 Binary files a/pygraph/utils/__pycache__/graphfiles.cpython-35.pyc and b/pygraph/utils/__pycache__/graphfiles.cpython-35.pyc differ diff --git a/pygraph/utils/__pycache__/utils.cpython-35.pyc b/pygraph/utils/__pycache__/utils.cpython-35.pyc index 0710a16..35fdb36 100644 Binary files a/pygraph/utils/__pycache__/utils.cpython-35.pyc and b/pygraph/utils/__pycache__/utils.cpython-35.pyc differ diff --git a/pygraph/utils/graphfiles.py b/pygraph/utils/graphfiles.py index 32cdace..ef43165 100644 --- a/pygraph/utils/graphfiles.py +++ b/pygraph/utils/graphfiles.py @@ -3,7 +3,7 @@ def loadCT(filename): """load data from .ct file. - +nn Notes ------ a typical example of data in .ct is like this: @@ -33,12 +33,17 @@ def loadCT(filename): tmp = content[i + 2].split(" ") tmp = [x for x in tmp if x != ''] g.add_node(i, atom=tmp[3], label=tmp[3]) - for i in range(0, nb_edges): - tmp = content[i + g.number_of_nodes() + 2] - tmp = [tmp[i:i+3] for i in range(0, len(tmp), 3)] + tmp = content[i + g.number_of_nodes() + 2].split(" ") + tmp = [x for x in tmp if x != ''] g.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, - bond_type=tmp[3].strip(), label=tmp[3].strip()) + bond_type=tmp[3].strip(), label=tmp[3].strip()) + +# for i in range(0, nb_edges): +# tmp = content[i + g.number_of_nodes() + 2] +# tmp = [tmp[i:i+3] for i in range(0, len(tmp), 3)] +# g.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, +# bond_type=tmp[3].strip(), label=tmp[3].strip()) return g @@ -101,7 +106,57 @@ def saveGXL(graph, filename): tree.write(filename) -def loadDataset(filename): +def loadSDF(filename): + """load data from structured data file (.sdf file). + + Notes + ------ + A SDF file contains a group of molecules, represented in the similar way as in MOL format. + see http://www.nonlinear.com/progenesis/sdf-studio/v0.9/faq/sdf-file-format-guidance.aspx, 2018 for detailed structure. + """ + import networkx as nx + from os.path import basename + from tqdm import tqdm + import sys + data = [] + with open(filename) as f: + content = f.read().splitlines() + index = 0 + pbar = tqdm(total = len(content) + 1, desc = 'load SDF', file=sys.stdout) + while index < len(content): + index_old = index + + g = nx.Graph(name=content[index].strip()) # set name of the graph + + tmp = content[index + 3] + nb_nodes = int(tmp[:3]) # number of the nodes + nb_edges = int(tmp[3:6]) # number of the edges + + for i in range(0, nb_nodes): + tmp = content[i + index + 4] + g.add_node(i, atom=tmp[31:34].strip()) + + for i in range(0, nb_edges): + tmp = content[i + index + g.number_of_nodes() + 4] + tmp = [tmp[i:i+3] for i in range(0, len(tmp), 3)] + g.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, bond_type=tmp[2].strip()) + + data.append(g) + + index += 4 + g.number_of_nodes() + g.number_of_edges() + while content[index].strip() != '$$$$': # seperator + index += 1 + index += 1 + + pbar.update(index - index_old) + pbar.update(1) + pbar.close() + + return data + + + +def loadDataset(filename, filename_y = ''): """load file list of the dataset. """ from os.path import dirname, splitext @@ -128,5 +183,28 @@ def loadDataset(filename): mol_class = graph.attrib['class'] data.append(loadGXL(dirname_dataset + '/' + mol_filename)) y.append(mol_class) + elif extension == "sdf": + import numpy as np + from tqdm import tqdm + import sys + + data = loadSDF(filename) + + y_raw = open(filename_y).read().splitlines() + y_raw.pop(0) + tmp0 = [] + tmp1 = [] + for i in range(0, len(y_raw)): + tmp = y_raw[i].split(',') + tmp0.append(tmp[0]) + tmp1.append(tmp[1].strip()) + + y = [] + for i in tqdm(range(0, len(data)), desc = 'ajust data', file=sys.stdout): + try: + y.append(tmp1[tmp0.index(data[i].name)].strip()) + except ValueError: # if data[i].name not in tmp0 + data[i] = [] + data = list(filter(lambda a: a != [], data)) return data, y diff --git a/pygraph/utils/utils.py b/pygraph/utils/utils.py index 1dbb584..1551e84 100644 --- a/pygraph/utils/utils.py +++ b/pygraph/utils/utils.py @@ -1,5 +1,6 @@ import networkx as nx import numpy as np +from tqdm import tqdm def getSPLengths(G1): @@ -58,21 +59,15 @@ def floydTransformation(G, edge_weight = 'bond_type'): S = nx.Graph() S.add_nodes_from(G.nodes(data=True)) for i in range(0, G.number_of_nodes()): - for j in range(0, G.number_of_nodes()): + for j in range(i, G.number_of_nodes()): S.add_edge(i, j, cost = spMatrix[i, j]) return S -import os -import pathlib -from collections import OrderedDict -from tabulate import tabulate -from .graphfiles import loadDataset - -def kernel_train_test(datafile, kernel_file_path, kernel_func, kernel_para, trials = 100, splits = 10, alpha_grid = None, C_grid = None, hyper_name = '', hyper_range = [1], normalize = False): +def kernel_train_test(datafile, kernel_file_path, kernel_func, kernel_para, trials = 100, splits = 10, alpha_grid = None, C_grid = None, hyper_name = '', hyper_range = [1], normalize = False, datafile_y = '', model_type = 'regression'): """Perform training and testing for a kernel method. Print out neccessary data during the process then finally the results. - + Parameters ---------- datafile : string @@ -96,12 +91,14 @@ def kernel_train_test(datafile, kernel_file_path, kernel_func, kernel_para, tria hyper_range : list Range of the hyperparameter. normalize : string - Determine whether or not that normalization is performed. The default is False. + Determine whether or not that normalization is performed. Only works when model_type == 'regression'. The default is False. + model_type : string + Typr of the problem, regression or classification problem References ---------- [1] Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py, 2018.1 - + Examples -------- >>> import sys @@ -113,29 +110,41 @@ def kernel_train_test(datafile, kernel_file_path, kernel_func, kernel_para, tria >>> kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True) >>> kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = True) """ + import os + import pathlib + from collections import OrderedDict + from tabulate import tabulate + from .graphfiles import loadDataset + # setup the parameters - model_type = 'regression' # Regression or classification problem + model_type = model_type.lower() + if model_type != 'regression' and model_type != 'classification': + raise Exception('The model type is incorrect! Please choose from regression or clqssification.') print('\n --- This is a %s problem ---' % model_type) - + alpha_grid = np.logspace(-10, 10, num = trials, base = 10) if alpha_grid == None else alpha_grid # corresponds to (2*C)^-1 in other linear models such as LogisticRegression C_grid = np.logspace(-10, 10, num = trials, base = 10) if C_grid == None else C_grid - + if not os.path.exists(kernel_file_path): os.makedirs(kernel_file_path) - + train_means_list = [] train_stds_list = [] test_means_list = [] test_stds_list = [] kernel_time_list = [] - + for hyper_para in hyper_range: - print('' if hyper_name == '' else '\n\n #--- calculating kernel matrix when %s = %.1f ---#' % (hyper_name, hyper_para)) + print('' if hyper_name == '' else '\n\n #--- calculating kernel matrix when', hyper_name, '=', hyper_para, '---#') print('\n Loading dataset from file...') - dataset, y = loadDataset(datafile) + dataset, y = loadDataset(datafile, filename_y = datafile_y) y = np.array(y) -# print(y) + # normalize labels and transform non-numerical labels to numerical labels. + if model_type == 'classification': + from sklearn.preprocessing import LabelEncoder + y = LabelEncoder().fit_transform(y) + # print(y) # save kernel matrices to files / read kernel matrices from files kernel_file = kernel_file_path + 'km.ds' @@ -152,7 +161,7 @@ def kernel_train_test(datafile, kernel_file_path, kernel_func, kernel_para, tria Kmatrix, run_time = kernel_func(dataset, **kernel_para) kernel_time_list.append(run_time) print(Kmatrix) - print('\n Saving kernel matrix to file...') + # print('\n Saving kernel matrix to file...') # np.savetxt(kernel_file, Kmatrix) """ @@ -170,25 +179,29 @@ def kernel_train_test(datafile, kernel_file_path, kernel_func, kernel_para, tria test_stds_list.append(test_std) print('\n') - table_dict = {'rmse_test': test_means_list, 'std_test': test_stds_list, \ - 'rmse_train': train_means_list, 'std_train': train_stds_list, 'k_time': kernel_time_list} - if hyper_name == '': - keyorder = ['rmse_test', 'std_test', 'rmse_train', 'std_train', 'k_time'] - - else: - table_dict[hyper_name] = hyper_range - keyorder = [hyper_name, 'rmse_test', 'std_test', 'rmse_train', 'std_train', 'k_time'] + if model_type == 'regression': + table_dict = {'rmse_test': test_means_list, 'std_test': test_stds_list, \ + 'rmse_train': train_means_list, 'std_train': train_stds_list, 'k_time': kernel_time_list} + if hyper_name == '': + keyorder = ['rmse_test', 'std_test', 'rmse_train', 'std_train', 'k_time'] + else: + table_dict[hyper_name] = hyper_range + keyorder = [hyper_name, 'rmse_test', 'std_test', 'rmse_train', 'std_train', 'k_time'] + elif model_type == 'classification': + table_dict = {'accur_test': test_means_list, 'std_test': test_stds_list, \ + 'accur_train': train_means_list, 'std_train': train_stds_list, 'k_time': kernel_time_list} + if hyper_name == '': + keyorder = ['accur_test', 'std_test', 'accur_train', 'std_train', 'k_time'] + else: + table_dict[hyper_name] = hyper_range + keyorder = [hyper_name, 'accur_test', 'std_test', 'accur_train', 'std_train', 'k_time'] print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys')) -import random -from sklearn.kernel_ridge import KernelRidge # 0.17 -from sklearn.metrics import accuracy_score, mean_squared_error -from sklearn import svm def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, trials = 100, model_type = 'regression', normalize = False): """Split dataset to training and testing splits, train and test. Print out and return the results. - + Parameters ---------- Kmatrix : Numpy matrix @@ -206,8 +219,8 @@ def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, tri model_type : string Determine whether it is a regression or classification problem. The default is 'regression'. normalize : string - Determine whether or not that normalization is performed. The default is False. - + Determine whether or not that normalization is performed. Only works when model_type == 'regression'. The default is False. + Return ------ train_mean : float @@ -218,19 +231,27 @@ def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, tri mean of the best tests. test_std : float mean of test stds in the same trial with the best test mean. - + References ---------- [1] Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py, 2018.1 """ + import random + from sklearn.kernel_ridge import KernelRidge # 0.17 + from sklearn.metrics import accuracy_score, mean_squared_error + from sklearn import svm + datasize = len(train_target) random.seed(20) # Set the seed for uniform parameter distribution - + # Initialize the performance of the best parameter trial on train with the corresponding performance on test train_split = [] test_split = [] # For each split of the data + print('\n Starting calculate accuracy/rmse...') + import sys + pbar = tqdm(total = splits * trials, desc = 'calculate performance', file=sys.stdout) for j in range(10, 10 + splits): # print('\n Starting split %d...' % j) @@ -255,7 +276,7 @@ def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, tri # Split the targets y_train = y_perm[0:num_train] - + # Normalization step (for real valued targets only) if normalize == True and model_type == 'regression': @@ -275,7 +296,6 @@ def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, tri if model_type == 'regression': # Fit the kernel ridge model KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i]) - # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i]) KR.fit(Kmatrix_train, y_train if normalize == False else y_train_norm) # predict on the train and test set @@ -284,15 +304,33 @@ def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, tri # adjust prediction: needed because the training targets have been normalized if normalize == True: - y_pred_train = y_pred_train * float(y_train_std) + y_train_mean + y_pred_train = y_pred_train * float(y_train_std) + y_train_mean y_pred_test = y_pred_test * float(y_train_std) + y_train_mean - # root mean squared error in train set - rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train)) - perf_all_train.append(rmse_train) - # root mean squared error in test set - rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) - perf_all_test.append(rmse_test) + # root mean squared error on train set + accuracy_train = np.sqrt(mean_squared_error(y_train, y_pred_train)) + perf_all_train.append(accuracy_train) + # root mean squared error on test set + accuracy_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) + perf_all_test.append(accuracy_test) + + # For clcassification use SVM + elif model_type == 'classification': + KR = svm.SVC(kernel = 'precomputed', C = C_grid[i]) + KR.fit(Kmatrix_train, y_train) + + # predict on the train and test set + y_pred_train = KR.predict(Kmatrix_train) + y_pred_test = KR.predict(Kmatrix_test) + + # accuracy on train set + accuracy_train = accuracy_score(y_train, y_pred_train) + perf_all_train.append(accuracy_train) + # accuracy on test set + accuracy_test = accuracy_score(y_test, y_pred_test) + perf_all_test.append(accuracy_test) + + pbar.update(1) # --- FIND THE OPTIMAL PARAMETERS --- # # For regression: minimise the mean squared error @@ -306,6 +344,17 @@ def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, tri perf_train_opt = perf_all_train[min_idx] perf_test_opt = perf_all_test[min_idx] + # For classification: maximise the accuracy + if model_type == 'classification': + # get optimal parameter on test (argmax accuracy) + max_idx = np.argmax(perf_all_test) + C_opt = C_grid[max_idx] + + # corresponding performance on train and test set for the same parameter + perf_train_opt = perf_all_train[max_idx] + perf_test_opt = perf_all_test[max_idx] + + # append the correponding performance on the train and test set train_split.append(perf_train_opt) test_split.append(perf_test_opt) @@ -322,5 +371,5 @@ def split_train_test(Kmatrix, train_target, alpha_grid, C_grid, splits = 10, tri print('With standard deviation: %3f' % train_std) print('\n Mean performance on test set: %3f' % test_mean) print('With standard deviation: %3f' % test_std) - - return train_mean, train_std, test_mean, test_std \ No newline at end of file + + return train_mean, train_std, test_mean, test_std diff --git a/run_cyclic.py b/run_cyclic.py new file mode 100644 index 0000000..d7a9c36 --- /dev/null +++ b/run_cyclic.py @@ -0,0 +1,16 @@ +import sys +sys.path.insert(0, "../") +from pygraph.utils.utils import kernel_train_test +from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel + +import numpy as np + +datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf' +datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt' +kernel_file_path = 'kernelmatrices_path_acyclic/' + +kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True) + +kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \ + hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 1000, 21), normalize = False, \ + datafile_y = datafile_y, model_type = 'classification')