You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb 181 kB


  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "name": "stdout",
  10. "output_type": "stream",
  11. "text": [
  12. "\n",
  13. " --- This is a regression problem ---\n",
  14. "\n",
  15. "\n",
  16. " #--- calculating kernel matrix when height = 0.0 ---#\n",
  17. "\n",
  18. " Loading dataset from file...\n",
  19. "\n",
  20. " Calculating kernel matrix, this could take a while...\n",
  21. "\n",
  22. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.38979601860046387 seconds ---\n",
  23. "[[ 5. 6. 4. ..., 20. 20. 20.]\n",
  24. " [ 6. 8. 4. ..., 20. 20. 20.]\n",
  25. " [ 4. 4. 5. ..., 21. 21. 21.]\n",
  26. " ..., \n",
  27. " [ 20. 20. 21. ..., 101. 101. 101.]\n",
  28. " [ 20. 20. 21. ..., 101. 101. 101.]\n",
  29. " [ 20. 20. 21. ..., 101. 101. 101.]]\n",
  30. "\n",
  31. " Saving kernel matrix to file...\n",
  32. "\n",
  33. " Mean performance on train set: 17.681582\n",
  34. "With standard deviation: 0.713183\n",
  35. "\n",
  36. " Mean performance on test set: 15.685879\n",
  37. "With standard deviation: 4.139197\n",
  38. "\n",
  39. "\n",
  40. " #--- calculating kernel matrix when height = 1.0 ---#\n",
  41. "\n",
  42. " Loading dataset from file...\n",
  43. "\n",
  44. " Calculating kernel matrix, this could take a while...\n",
  45. "\n",
  46. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.8205692768096924 seconds ---\n",
  47. "[[ 10. 10. 4. ..., 20. 20. 20.]\n",
  48. " [ 10. 16. 4. ..., 20. 20. 20.]\n",
  49. " [ 4. 4. 10. ..., 22. 22. 24.]\n",
  50. " ..., \n",
  51. " [ 20. 20. 22. ..., 130. 130. 122.]\n",
  52. " [ 20. 20. 22. ..., 130. 130. 122.]\n",
  53. " [ 20. 20. 24. ..., 122. 122. 154.]]\n",
  54. "\n",
  55. " Saving kernel matrix to file...\n",
  56. "\n",
  57. " Mean performance on train set: 6.270014\n",
  58. "With standard deviation: 0.654734\n",
  59. "\n",
  60. " Mean performance on test set: 7.550458\n",
  61. "With standard deviation: 2.331786\n",
  62. "\n",
  63. "\n",
  64. " #--- calculating kernel matrix when height = 2.0 ---#\n",
  65. "\n",
  66. " Loading dataset from file...\n",
  67. "\n",
  68. " Calculating kernel matrix, this could take a while...\n",
  69. "\n",
  70. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.375309705734253 seconds ---\n",
  71. "[[ 15. 10. 4. ..., 20. 20. 20.]\n",
  72. " [ 10. 24. 4. ..., 20. 20. 20.]\n",
  73. " [ 4. 4. 15. ..., 22. 22. 26.]\n",
  74. " ..., \n",
  75. " [ 20. 20. 22. ..., 159. 151. 124.]\n",
  76. " [ 20. 20. 22. ..., 151. 153. 124.]\n",
  77. " [ 20. 20. 26. ..., 124. 124. 185.]]\n",
  78. "\n",
  79. " Saving kernel matrix to file...\n",
  80. "\n",
  81. " Mean performance on train set: 4.450682\n",
  82. "With standard deviation: 0.882129\n",
  83. "\n",
  84. " Mean performance on test set: 9.728466\n",
  85. "With standard deviation: 2.057669\n",
  86. "\n",
  87. "\n",
  88. " #--- calculating kernel matrix when height = 3.0 ---#\n",
  89. "\n",
  90. " Loading dataset from file...\n",
  91. "\n",
  92. " Calculating kernel matrix, this could take a while...\n",
  93. "\n",
  94. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.8636789321899414 seconds ---\n",
  95. "[[ 20. 10. 4. ..., 20. 20. 20.]\n",
  96. " [ 10. 32. 4. ..., 20. 20. 20.]\n",
  97. " [ 4. 4. 20. ..., 22. 22. 26.]\n",
  98. " ..., \n",
  99. " [ 20. 20. 22. ..., 188. 159. 124.]\n",
  100. " [ 20. 20. 22. ..., 159. 168. 124.]\n",
  101. " [ 20. 20. 26. ..., 124. 124. 202.]]\n",
  102. "\n",
  103. " Saving kernel matrix to file...\n",
  104. "\n",
  105. " Mean performance on train set: 2.270586\n",
  106. "With standard deviation: 0.481516\n",
  107. "\n",
  108. " Mean performance on test set: 11.296110\n",
  109. "With standard deviation: 2.799944\n",
  110. "\n",
  111. "\n",
  112. " #--- calculating kernel matrix when height = 4.0 ---#\n",
  113. "\n",
  114. " Loading dataset from file...\n",
  115. "\n",
  116. " Calculating kernel matrix, this could take a while...\n",
  117. "\n",
  118. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.5077457427978516 seconds ---\n",
  119. "[[ 25. 10. 4. ..., 20. 20. 20.]\n",
  120. " [ 10. 40. 4. ..., 20. 20. 20.]\n",
  121. " [ 4. 4. 25. ..., 22. 22. 26.]\n",
  122. " ..., \n",
  123. " [ 20. 20. 22. ..., 217. 159. 124.]\n",
  124. " [ 20. 20. 22. ..., 159. 183. 124.]\n",
  125. " [ 20. 20. 26. ..., 124. 124. 213.]]\n",
  126. "\n",
  127. " Saving kernel matrix to file...\n",
  128. "\n",
  129. " Mean performance on train set: 1.074035\n",
  130. "With standard deviation: 0.637823\n",
  131. "\n",
  132. " Mean performance on test set: 12.808303\n",
  133. "With standard deviation: 3.446939\n",
  134. "\n",
  135. "\n",
  136. " #--- calculating kernel matrix when height = 5.0 ---#\n",
  137. "\n",
  138. " Loading dataset from file...\n",
  139. "\n",
  140. " Calculating kernel matrix, this could take a while...\n",
  141. "\n",
  142. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.8235607147216797 seconds ---\n",
  143. "[[ 30. 10. 4. ..., 20. 20. 20.]\n",
  144. " [ 10. 48. 4. ..., 20. 20. 20.]\n",
  145. " [ 4. 4. 30. ..., 22. 22. 26.]\n",
  146. " ..., \n",
  147. " [ 20. 20. 22. ..., 246. 159. 124.]\n",
  148. " [ 20. 20. 22. ..., 159. 198. 124.]\n",
  149. " [ 20. 20. 26. ..., 124. 124. 224.]]\n",
  150. "\n",
  151. " Saving kernel matrix to file...\n",
  152. "\n",
  153. " Mean performance on train set: 0.700602\n",
  154. "With standard deviation: 0.572640\n",
  155. "\n",
  156. " Mean performance on test set: 14.017923\n",
  157. "With standard deviation: 3.675042\n",
  158. "\n",
  159. "\n",
  160. " #--- calculating kernel matrix when height = 6.0 ---#\n",
  161. "\n",
  162. " Loading dataset from file...\n",
  163. "\n",
  164. " Calculating kernel matrix, this could take a while...\n",
  165. "\n",
  166. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.458494186401367 seconds ---\n",
  167. "[[ 35. 10. 4. ..., 20. 20. 20.]\n",
  168. " [ 10. 56. 4. ..., 20. 20. 20.]\n",
  169. " [ 4. 4. 35. ..., 22. 22. 26.]\n",
  170. " ..., \n",
  171. " [ 20. 20. 22. ..., 275. 159. 124.]\n",
  172. " [ 20. 20. 22. ..., 159. 213. 124.]\n",
  173. " [ 20. 20. 26. ..., 124. 124. 235.]]\n",
  174. "\n",
  175. " Saving kernel matrix to file...\n",
  176. "\n",
  177. " Mean performance on train set: 0.691515\n",
  178. "With standard deviation: 0.564620\n",
  179. "\n",
  180. " Mean performance on test set: 14.918434\n",
  181. "With standard deviation: 3.805352\n",
  182. "\n",
  183. "\n",
  184. " #--- calculating kernel matrix when height = 7.0 ---#\n",
  185. "\n",
  186. " Loading dataset from file...\n",
  187. "\n",
  188. " Calculating kernel matrix, this could take a while...\n",
  189. "\n",
  190. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.861224889755249 seconds ---\n",
  191. "[[ 40. 10. 4. ..., 20. 20. 20.]\n",
  192. " [ 10. 64. 4. ..., 20. 20. 20.]\n",
  193. " [ 4. 4. 40. ..., 22. 22. 26.]\n",
  194. " ..., \n",
  195. " [ 20. 20. 22. ..., 304. 159. 124.]\n",
  196. " [ 20. 20. 22. ..., 159. 228. 124.]\n",
  197. " [ 20. 20. 26. ..., 124. 124. 246.]]\n",
  198. "\n",
  199. " Saving kernel matrix to file...\n",
  200. "\n",
  201. " Mean performance on train set: 0.691516\n",
  202. "With standard deviation: 0.564620\n",
  203. "\n",
  204. " Mean performance on test set: 15.629476\n",
  205. "With standard deviation: 3.865387\n",
  206. "\n",
  207. "\n",
  208. " #--- calculating kernel matrix when height = 8.0 ---#\n",
  209. "\n",
  210. " Loading dataset from file...\n",
  211. "\n",
  212. " Calculating kernel matrix, this could take a while...\n",
  213. "\n",
  214. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.295838117599487 seconds ---\n",
  215. "[[ 45. 10. 4. ..., 20. 20. 20.]\n",
  216. " [ 10. 72. 4. ..., 20. 20. 20.]\n",
  217. " [ 4. 4. 45. ..., 22. 22. 26.]\n",
  218. " ..., \n",
  219. " [ 20. 20. 22. ..., 333. 159. 124.]\n",
  220. " [ 20. 20. 22. ..., 159. 243. 124.]\n",
  221. " [ 20. 20. 26. ..., 124. 124. 257.]]\n",
  222. "\n",
  223. " Saving kernel matrix to file...\n",
  224. "\n",
  225. " Mean performance on train set: 0.691515\n",
  226. "With standard deviation: 0.564620\n",
  227. "\n",
  228. " Mean performance on test set: 16.214369\n",
  229. "With standard deviation: 3.928756\n",
  230. "\n",
  231. "\n",
  232. " #--- calculating kernel matrix when height = 9.0 ---#\n",
  233. "\n",
  234. " Loading dataset from file...\n",
  235. "\n",
  236. " Calculating kernel matrix, this could take a while...\n",
  237. "\n",
  238. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.008287668228149 seconds ---\n",
  239. "[[ 50. 10. 4. ..., 20. 20. 20.]\n",
  240. " [ 10. 80. 4. ..., 20. 20. 20.]\n",
  241. " [ 4. 4. 50. ..., 22. 22. 26.]\n",
  242. " ..., \n",
  243. " [ 20. 20. 22. ..., 362. 159. 124.]\n",
  244. " [ 20. 20. 22. ..., 159. 258. 124.]\n",
  245. " [ 20. 20. 26. ..., 124. 124. 268.]]\n",
  246. "\n",
  247. " Saving kernel matrix to file...\n",
  248. "\n",
  249. " Mean performance on train set: 0.691515\n",
  250. "With standard deviation: 0.564620\n",
  251. "\n",
  252. " Mean performance on test set: 16.725744\n",
  253. "With standard deviation: 3.993095\n",
  254. "\n",
  255. "\n",
  256. " #--- calculating kernel matrix when height = 10.0 ---#\n",
  257. "\n",
  258. " Loading dataset from file...\n",
  259. "\n",
  260. " Calculating kernel matrix, this could take a while...\n",
  261. "\n",
  262. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 5.347799301147461 seconds ---\n",
  263. "[[ 55. 10. 4. ..., 20. 20. 20.]\n",
  264. " [ 10. 88. 4. ..., 20. 20. 20.]\n",
  265. " [ 4. 4. 55. ..., 22. 22. 26.]\n",
  266. " ..., \n",
  267. " [ 20. 20. 22. ..., 391. 159. 124.]\n",
  268. " [ 20. 20. 22. ..., 159. 273. 124.]\n",
  269. " [ 20. 20. 26. ..., 124. 124. 279.]]\n",
  270. "\n",
  271. " Saving kernel matrix to file...\n",
  272. "\n",
  273. " Mean performance on train set: 0.691516\n",
  274. "With standard deviation: 0.564621\n",
  275. "\n",
  276. " Mean performance on test set: 17.186401\n",
  277. "With standard deviation: 4.056724\n",
  278. "\n",
  279. "\n",
  280. " height RMSE_test std_test RMSE_train std_train k_time\n",
  281. "-------- ----------- ---------- ------------ ----------- --------\n",
  282. " 0 15.6859 4.1392 17.6816 0.713183 0.389796\n",
  283. " 1 7.55046 2.33179 6.27001 0.654734 0.820569\n",
  284. " 2 9.72847 2.05767 4.45068 0.882129 1.37531\n",
  285. " 3 11.2961 2.79994 2.27059 0.481516 1.86368\n",
  286. " 4 12.8083 3.44694 1.07403 0.637823 2.50775\n",
  287. " 5 14.0179 3.67504 0.700602 0.57264 2.82356\n",
  288. " 6 14.9184 3.80535 0.691515 0.56462 3.45849\n",
  289. " 7 15.6295 3.86539 0.691516 0.56462 3.86122\n",
  290. " 8 16.2144 3.92876 0.691515 0.56462 4.29584\n",
  291. " 9 16.7257 3.9931 0.691515 0.56462 5.00829\n",
  292. " 10 17.1864 4.05672 0.691516 0.564621 5.3478\n"
  293. ]
  294. }
  295. ],
  296. "source": [
  297. "# wl subtree kernel\n",
  298. "%load_ext line_profiler\n",
  299. "\n",
  300. "import numpy as np\n",
  301. "import sys\n",
  302. "sys.path.insert(0, \"../\")\n",
  303. "from pygraph.utils.utils import kernel_train_test\n",
  304. "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n",
  305. "\n",
  306. "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
  307. "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
  308. "\n",
  309. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type')\n",
  310. "\n",
  311. "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  312. " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n",
  313. "\n",
  314. "# %lprun -f _wl_subtreekernel_do \\\n",
  315. "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  316. "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)"
  317. ]
  318. },
  319. {
  320. "cell_type": "code",
  321. "execution_count": 1,
  322. "metadata": {},
  323. "outputs": [
  324. {
  325. "ename": "ImportError",
  326. "evalue": "cannot import name 'NUMPY_MKL'",
  327. "output_type": "error",
  328. "traceback": [
  329. "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
  330. "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)",
  331. "\u001b[1;32m<ipython-input-1-e7b9d5ef03e3>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minsert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"../\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpygraph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mkernel_train_test\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpygraph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkernels\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mweisfeilerLehmanKernel\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_wl_subtreekernel_do\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
  332. "\u001b[1;32mE:\\课程及课件\\Doctorant\\py-graph\\pygraph\\utils\\utils.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 183\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 184\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mrandom\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 185\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkernel_ridge\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mKernelRidge\u001b[0m \u001b[1;31m# 0.17\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 186\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmean_squared_error\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msvm\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
  333. "\u001b[1;32md:\\python\\python36\\lib\\site-packages\\sklearn\\__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 132\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 133\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0m__check_build\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 134\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mbase\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mclone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 135\u001b[0m \u001b[0m__check_build\u001b[0m \u001b[1;31m# avoid flakes unused variable error\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
  334. "\u001b[1;32md:\\python\\python36\\lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 11\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msparse\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 12\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mexternals\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msix\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfixes\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msignature\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
  335. "\u001b[1;32md:\\python\\python36\\lib\\site-packages\\scipy\\__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[0m__all__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'test'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 61\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_distributor_init\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mNUMPY_MKL\u001b[0m \u001b[1;31m# requires numpy+mkl\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 62\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mshow_config\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mshow_numpy_config\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
  336. "\u001b[1;31mImportError\u001b[0m: cannot import name 'NUMPY_MKL'"
  337. ]
  338. }
  339. ],
  340. "source": [
  341. "# WL sp kernel\n",
  342. "%load_ext line_profiler\n",
  343. "\n",
  344. "import numpy as np\n",
  345. "import sys\n",
  346. "sys.path.insert(0, \"../\")\n",
  347. "from pygraph.utils.utils import kernel_train_test\n",
  348. "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n",
  349. "\n",
  350. "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
  351. "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
  352. "\n",
  353. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'sp')\n",
  354. "\n",
  355. "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  356. " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n",
  357. "\n",
  358. "# %lprun -f _wl_subtreekernel_do \\\n",
  359. "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  360. "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)"
  361. ]
  362. },
  363. {
  364. "cell_type": "code",
  365. "execution_count": null,
  366. "metadata": {},
  367. "outputs": [],
  368. "source": [
  369. "# results\n",
  370. "\n",
  371. "# with y normalization\n",
  372. " height RMSE_test std_test RMSE_train std_train k_time\n",
  373. "-------- ----------- ---------- ------------ ----------- --------\n",
  374. " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n",
  375. " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n",
  376. " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n",
  377. " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n",
  378. " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n",
  379. " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n",
  380. " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n",
  381. " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n",
  382. " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n",
  383. " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n",
  384. " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n",
  385. "\n",
  386. "# without y normalization\n",
  387. " height RMSE_test std_test RMSE_train std_train k_time\n",
  388. "-------- ----------- ---------- ------------ ----------- --------\n",
  389. " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n",
  390. " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n",
  391. " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n",
  392. " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n",
  393. " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n",
  394. " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n",
  395. " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n",
  396. " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n",
  397. " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n",
  398. " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n",
  399. " 10 17.1864 4.05672 0.691516 0.564621 5.00918"
  400. ]
  401. },
  402. {
  403. "cell_type": "code",
  404. "execution_count": 3,
  405. "metadata": {
  406. "scrolled": true
  407. },
  408. "outputs": [
  409. {
  410. "name": "stdout",
  411. "output_type": "stream",
  412. "text": [
  413. "\n",
  414. "- This script take as input a kernel matrix\n",
  415. "and returns the classification or regression performance\n",
  416. "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
  417. "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
  418. "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
  419. "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
  420. "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
  421. "correspond to the average of the performances on the test sets. \n",
  422. "\n",
  423. "@references\n",
  424. " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
  425. "\n",
  426. "\n",
  427. "\n",
  428. " #--- calculating kernel matrix when subtree height = 0 ---#\n",
  429. "\n",
  430. " Loading dataset from file...\n",
  431. "\n",
  432. " --- This is a regression problem ---\n",
  433. "\n",
  434. " Calculating kernel matrix, this could take a while...\n",
  435. "\n",
  436. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3920705318450928 seconds ---\n",
  437. "[[ 5. 6. 4. ..., 20. 20. 20.]\n",
  438. " [ 6. 8. 4. ..., 20. 20. 20.]\n",
  439. " [ 4. 4. 5. ..., 21. 21. 21.]\n",
  440. " ..., \n",
  441. " [ 20. 20. 21. ..., 101. 101. 101.]\n",
  442. " [ 20. 20. 21. ..., 101. 101. 101.]\n",
  443. " [ 20. 20. 21. ..., 101. 101. 101.]]\n",
  444. "\n",
  445. " Saving kernel matrix to file...\n",
  446. "\n",
  447. " Mean performance on train set: 17.681582\n",
  448. "With standard deviation: 0.713183\n",
  449. "\n",
  450. " Mean performance on test set: 15.685879\n",
  451. "With standard deviation: 4.139197\n",
  452. "\n",
  453. "\n",
  454. " #--- calculating kernel matrix when subtree height = 1 ---#\n",
  455. "\n",
  456. " Loading dataset from file...\n",
  457. "\n",
  458. " --- This is a regression problem ---\n",
  459. "\n",
  460. " Calculating kernel matrix, this could take a while...\n",
  461. "\n",
  462. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.8578901290893555 seconds ---\n",
  463. "[[ 10. 10. 4. ..., 20. 20. 20.]\n",
  464. " [ 10. 16. 4. ..., 20. 20. 20.]\n",
  465. " [ 4. 4. 10. ..., 22. 22. 24.]\n",
  466. " ..., \n",
  467. " [ 20. 20. 22. ..., 130. 130. 122.]\n",
  468. " [ 20. 20. 22. ..., 130. 130. 122.]\n",
  469. " [ 20. 20. 24. ..., 122. 122. 154.]]\n",
  470. "\n",
  471. " Saving kernel matrix to file...\n",
  472. "\n",
  473. " Mean performance on train set: 6.270014\n",
  474. "With standard deviation: 0.654734\n",
  475. "\n",
  476. " Mean performance on test set: 7.550458\n",
  477. "With standard deviation: 2.331786\n",
  478. "\n",
  479. "\n",
  480. " #--- calculating kernel matrix when subtree height = 2 ---#\n",
  481. "\n",
  482. " Loading dataset from file...\n",
  483. "\n",
  484. " --- This is a regression problem ---\n",
  485. "\n",
  486. " Calculating kernel matrix, this could take a while...\n",
  487. "\n",
  488. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.264050006866455 seconds ---\n",
  489. "[[ 15. 10. 4. ..., 20. 20. 20.]\n",
  490. " [ 10. 24. 4. ..., 20. 20. 20.]\n",
  491. " [ 4. 4. 15. ..., 22. 22. 26.]\n",
  492. " ..., \n",
  493. " [ 20. 20. 22. ..., 159. 151. 124.]\n",
  494. " [ 20. 20. 22. ..., 151. 153. 124.]\n",
  495. " [ 20. 20. 26. ..., 124. 124. 185.]]\n",
  496. "\n",
  497. " Saving kernel matrix to file...\n",
  498. "\n",
  499. " Mean performance on train set: 4.450682\n",
  500. "With standard deviation: 0.882129\n",
  501. "\n",
  502. " Mean performance on test set: 9.728466\n",
  503. "With standard deviation: 2.057669\n",
  504. "\n",
  505. "\n",
  506. " #--- calculating kernel matrix when subtree height = 3 ---#\n",
  507. "\n",
  508. " Loading dataset from file...\n",
  509. "\n",
  510. " --- This is a regression problem ---\n",
  511. "\n",
  512. " Calculating kernel matrix, this could take a while...\n",
  513. "\n",
  514. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.731236219406128 seconds ---\n",
  515. "[[ 20. 10. 4. ..., 20. 20. 20.]\n",
  516. " [ 10. 32. 4. ..., 20. 20. 20.]\n",
  517. " [ 4. 4. 20. ..., 22. 22. 26.]\n",
  518. " ..., \n",
  519. " [ 20. 20. 22. ..., 188. 159. 124.]\n",
  520. " [ 20. 20. 22. ..., 159. 168. 124.]\n",
  521. " [ 20. 20. 26. ..., 124. 124. 202.]]\n",
  522. "\n",
  523. " Saving kernel matrix to file...\n",
  524. "\n",
  525. " Mean performance on train set: 2.270586\n",
  526. "With standard deviation: 0.481516\n",
  527. "\n",
  528. " Mean performance on test set: 11.296110\n",
  529. "With standard deviation: 2.799944\n",
  530. "\n",
  531. "\n",
  532. " #--- calculating kernel matrix when subtree height = 4 ---#\n",
  533. "\n",
  534. " Loading dataset from file...\n",
  535. "\n",
  536. " --- This is a regression problem ---\n",
  537. "\n",
  538. " Calculating kernel matrix, this could take a while...\n",
  539. "\n",
  540. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.1112847328186035 seconds ---\n",
  541. "[[ 25. 10. 4. ..., 20. 20. 20.]\n",
  542. " [ 10. 40. 4. ..., 20. 20. 20.]\n",
  543. " [ 4. 4. 25. ..., 22. 22. 26.]\n",
  544. " ..., \n",
  545. " [ 20. 20. 22. ..., 217. 159. 124.]\n",
  546. " [ 20. 20. 22. ..., 159. 183. 124.]\n",
  547. " [ 20. 20. 26. ..., 124. 124. 213.]]\n",
  548. "\n",
  549. " Saving kernel matrix to file...\n",
  550. "\n",
  551. " Mean performance on train set: 1.074035\n",
  552. "With standard deviation: 0.637823\n",
  553. "\n",
  554. " Mean performance on test set: 12.808303\n",
  555. "With standard deviation: 3.446939\n",
  556. "\n",
  557. "\n",
  558. " #--- calculating kernel matrix when subtree height = 5 ---#\n",
  559. "\n",
  560. " Loading dataset from file...\n",
  561. "\n",
  562. " --- This is a regression problem ---\n",
  563. "\n",
  564. " Calculating kernel matrix, this could take a while...\n",
  565. "\n",
  566. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.4751319885253906 seconds ---\n",
  567. "[[ 30. 10. 4. ..., 20. 20. 20.]\n",
  568. " [ 10. 48. 4. ..., 20. 20. 20.]\n",
  569. " [ 4. 4. 30. ..., 22. 22. 26.]\n",
  570. " ..., \n",
  571. " [ 20. 20. 22. ..., 246. 159. 124.]\n",
  572. " [ 20. 20. 22. ..., 159. 198. 124.]\n",
  573. " [ 20. 20. 26. ..., 124. 124. 224.]]\n",
  574. "\n",
  575. " Saving kernel matrix to file...\n",
  576. "\n",
  577. " Mean performance on train set: 0.700602\n",
  578. "With standard deviation: 0.572640\n",
  579. "\n",
  580. " Mean performance on test set: 14.017923\n",
  581. "With standard deviation: 3.675042\n",
  582. "\n",
  583. "\n",
  584. " #--- calculating kernel matrix when subtree height = 6 ---#\n",
  585. "\n",
  586. " Loading dataset from file...\n",
  587. "\n",
  588. " --- This is a regression problem ---\n",
  589. "\n",
  590. " Calculating kernel matrix, this could take a while...\n",
  591. "\n",
  592. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.8712213039398193 seconds ---\n",
  593. "[[ 35. 10. 4. ..., 20. 20. 20.]\n",
  594. " [ 10. 56. 4. ..., 20. 20. 20.]\n",
  595. " [ 4. 4. 35. ..., 22. 22. 26.]\n",
  596. " ..., \n",
  597. " [ 20. 20. 22. ..., 275. 159. 124.]\n",
  598. " [ 20. 20. 22. ..., 159. 213. 124.]\n",
  599. " [ 20. 20. 26. ..., 124. 124. 235.]]\n",
  600. "\n",
  601. " Saving kernel matrix to file...\n",
  602. "\n",
  603. " Mean performance on train set: 0.691515\n",
  604. "With standard deviation: 0.564620\n",
  605. "\n",
  606. " Mean performance on test set: 14.918434\n",
  607. "With standard deviation: 3.805352\n",
  608. "\n",
  609. "\n",
  610. " #--- calculating kernel matrix when subtree height = 7 ---#\n",
  611. "\n",
  612. " Loading dataset from file...\n",
  613. "\n",
  614. " --- This is a regression problem ---\n",
  615. "\n",
  616. " Calculating kernel matrix, this could take a while...\n",
  617. "\n",
  618. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.554422378540039 seconds ---\n",
  619. "[[ 40. 10. 4. ..., 20. 20. 20.]\n",
  620. " [ 10. 64. 4. ..., 20. 20. 20.]\n",
  621. " [ 4. 4. 40. ..., 22. 22. 26.]\n",
  622. " ..., \n",
  623. " [ 20. 20. 22. ..., 304. 159. 124.]\n",
  624. " [ 20. 20. 22. ..., 159. 228. 124.]\n",
  625. " [ 20. 20. 26. ..., 124. 124. 246.]]\n",
  626. "\n",
  627. " Saving kernel matrix to file...\n",
  628. "\n",
  629. " Mean performance on train set: 0.691516\n",
  630. "With standard deviation: 0.564620\n",
  631. "\n",
  632. " Mean performance on test set: 15.629476\n",
  633. "With standard deviation: 3.865387\n",
  634. "\n",
  635. "\n",
  636. " #--- calculating kernel matrix when subtree height = 8 ---#\n",
  637. "\n",
  638. " Loading dataset from file...\n",
  639. "\n",
  640. " --- This is a regression problem ---\n",
  641. "\n",
  642. " Calculating kernel matrix, this could take a while...\n",
  643. "\n",
  644. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.8757314682006836 seconds ---\n",
  645. "[[ 45. 10. 4. ..., 20. 20. 20.]\n",
  646. " [ 10. 72. 4. ..., 20. 20. 20.]\n",
  647. " [ 4. 4. 45. ..., 22. 22. 26.]\n",
  648. " ..., \n",
  649. " [ 20. 20. 22. ..., 333. 159. 124.]\n",
  650. " [ 20. 20. 22. ..., 159. 243. 124.]\n",
  651. " [ 20. 20. 26. ..., 124. 124. 257.]]\n",
  652. "\n",
  653. " Saving kernel matrix to file...\n",
  654. "\n",
  655. " Mean performance on train set: 0.691515\n",
  656. "With standard deviation: 0.564620\n",
  657. "\n",
  658. " Mean performance on test set: 16.214369\n",
  659. "With standard deviation: 3.928756\n",
  660. "\n",
  661. "\n",
  662. " #--- calculating kernel matrix when subtree height = 9 ---#\n",
  663. "\n",
  664. " Loading dataset from file...\n",
  665. "\n",
  666. " --- This is a regression problem ---\n",
  667. "\n",
  668. " Calculating kernel matrix, this could take a while...\n",
  669. "\n",
  670. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.205373764038086 seconds ---\n",
  671. "[[ 50. 10. 4. ..., 20. 20. 20.]\n",
  672. " [ 10. 80. 4. ..., 20. 20. 20.]\n",
  673. " [ 4. 4. 50. ..., 22. 22. 26.]\n",
  674. " ..., \n",
  675. " [ 20. 20. 22. ..., 362. 159. 124.]\n",
  676. " [ 20. 20. 22. ..., 159. 258. 124.]\n",
  677. " [ 20. 20. 26. ..., 124. 124. 268.]]\n",
  678. "\n",
  679. " Saving kernel matrix to file...\n",
  680. "\n",
  681. " Mean performance on train set: 0.691515\n",
  682. "With standard deviation: 0.564620\n",
  683. "\n",
  684. " Mean performance on test set: 16.725744\n",
  685. "With standard deviation: 3.993095\n",
  686. "\n",
  687. "\n",
  688. " #--- calculating kernel matrix when subtree height = 10 ---#\n",
  689. "\n",
  690. " Loading dataset from file...\n",
  691. "\n",
  692. " --- This is a regression problem ---\n",
  693. "\n",
  694. " Calculating kernel matrix, this could take a while...\n"
  695. ]
  696. },
  697. {
  698. "name": "stdout",
  699. "output_type": "stream",
  700. "text": [
  701. "\n",
  702. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.737298250198364 seconds ---\n",
  703. "[[ 55. 10. 4. ..., 20. 20. 20.]\n",
  704. " [ 10. 88. 4. ..., 20. 20. 20.]\n",
  705. " [ 4. 4. 55. ..., 22. 22. 26.]\n",
  706. " ..., \n",
  707. " [ 20. 20. 22. ..., 391. 159. 124.]\n",
  708. " [ 20. 20. 22. ..., 159. 273. 124.]\n",
  709. " [ 20. 20. 26. ..., 124. 124. 279.]]\n",
  710. "\n",
  711. " Saving kernel matrix to file...\n",
  712. "\n",
  713. " Mean performance on train set: 0.691516\n",
  714. "With standard deviation: 0.564621\n",
  715. "\n",
  716. " Mean performance on test set: 17.186401\n",
  717. "With standard deviation: 4.056724\n",
  718. "\n",
  719. "\n",
  720. " height RMSE_test std_test RMSE_train std_train k_time\n",
  721. "-------- ----------- ---------- ------------ ----------- --------\n",
  722. " 0 15.6859 4.1392 17.6816 0.713183 0.392071\n",
  723. " 1 7.55046 2.33179 6.27001 0.654734 0.85789\n",
  724. " 2 9.72847 2.05767 4.45068 0.882129 1.26405\n",
  725. " 3 11.2961 2.79994 2.27059 0.481516 1.73124\n",
  726. " 4 12.8083 3.44694 1.07403 0.637823 2.11128\n",
  727. " 5 14.0179 3.67504 0.700602 0.57264 2.47513\n",
  728. " 6 14.9184 3.80535 0.691515 0.56462 2.87122\n",
  729. " 7 15.6295 3.86539 0.691516 0.56462 3.55442\n",
  730. " 8 16.2144 3.92876 0.691515 0.56462 3.87573\n",
  731. " 9 16.7257 3.9931 0.691515 0.56462 4.20537\n",
  732. " 10 17.1864 4.05672 0.691516 0.564621 4.7373\n"
  733. ]
  734. }
  735. ],
  736. "source": [
  737. "# test of WL subtree kernel\n",
  738. "\n",
  739. "\"\"\"\n",
  740. "- This script take as input a kernel matrix\n",
  741. "and returns the classification or regression performance\n",
  742. "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
  743. "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
  744. "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
  745. "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
  746. "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
  747. "correspond to the average of the performances on the test sets. \n",
  748. "\n",
  749. "@references\n",
  750. " Elisabetta Ghisu, https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
  751. "\"\"\"\n",
  752. "\n",
  753. "print(__doc__)\n",
  754. "\n",
  755. "import sys\n",
  756. "import os\n",
  757. "import pathlib\n",
  758. "from collections import OrderedDict\n",
  759. "sys.path.insert(0, \"../\")\n",
  760. "from tabulate import tabulate\n",
  761. "\n",
  762. "import numpy as np\n",
  763. "import matplotlib.pyplot as plt\n",
  764. "\n",
  765. "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n",
  766. "from pygraph.utils.graphfiles import loadDataset\n",
  767. "from pygraph.utils.utils import split_train_test\n",
  768. "\n",
  769. "train_means_list = []\n",
  770. "train_stds_list = []\n",
  771. "test_means_list = []\n",
  772. "test_stds_list = []\n",
  773. "kernel_time_list = []\n",
  774. "\n",
  775. "for height in np.linspace(0, 10, 11):\n",
  776. " print('\\n\\n #--- calculating kernel matrix when subtree height = %d ---#' % height)\n",
  777. "\n",
  778. " print('\\n Loading dataset from file...')\n",
  779. " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
  780. " y = np.array(y)\n",
  781. "# print(y)\n",
  782. "\n",
  783. " # setup the parameters\n",
  784. " model_type = 'regression' # Regression or classification problem\n",
  785. " print('\\n --- This is a %s problem ---' % model_type)\n",
  786. "\n",
  787. "# datasize = len(dataset)\n",
  788. " trials = 100 # Trials for hyperparameters random search\n",
  789. " splits = 10 # Number of splits of the data\n",
  790. " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n",
  791. " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n",
  792. "\n",
  793. "\n",
  794. " # set the output path\n",
  795. " kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
  796. " if not os.path.exists(kernel_file_path):\n",
  797. " os.makedirs(kernel_file_path)\n",
  798. "\n",
  799. " \"\"\"\n",
  800. " - Here starts the main program\n",
  801. " - First we permute the data, then for each split we evaluate corresponding performances\n",
  802. " - In the end, the performances are averaged over the test sets\n",
  803. " \"\"\"\n",
  804. "\n",
  805. " # save kernel matrices to files / read kernel matrices from files\n",
  806. " kernel_file = kernel_file_path + 'km.ds'\n",
  807. " path = pathlib.Path(kernel_file)\n",
  808. " # get train set kernel matrix\n",
  809. " if path.is_file():\n",
  810. " print('\\n Loading the kernel matrix from file...')\n",
  811. " Kmatrix = np.loadtxt(kernel_file)# results\n",
  812. " print(Kmatrix)\n",
  813. " else:\n",
  814. " print('\\n Calculating kernel matrix, this could take a while...')\n",
  815. " Kmatrix, run_time = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height))\n",
  816. " kernel_time_list.append(run_time)\n",
  817. " print(Kmatrix)\n",
  818. " print('\\n Saving kernel matrix to file...')\n",
  819. " # np.savetxt(kernel_file, Kmatrix)\n",
  820. "\n",
  821. " train_mean, train_std, test_mean, test_std = \\\n",
  822. " split_train_test(Kmatrix, y, alpha_grid, C_grid, splits, trials, model_type, normalize = False)\n",
  823. " \n",
  824. " train_means_list.append(train_mean)\n",
  825. " train_stds_list.append(train_std)\n",
  826. " test_means_list.append(test_mean)\n",
  827. " test_stds_list.append(test_std)\n",
  828. " \n",
  829. "print('\\n') \n",
  830. "table_dict = {'height': np.linspace(0, 10, 11), 'RMSE_test': test_means_list, 'std_test': test_stds_list, \\\n",
  831. " 'RMSE_train': train_means_list, 'std_train': train_stds_list, 'k_time': kernel_time_list}\n",
  832. "keyorder = ['height', 'RMSE_test', 'std_test', 'RMSE_train', 'std_train', 'k_time']\n",
  833. "print(tabulate(OrderedDict(sorted(table_dict.items(), key = lambda i:keyorder.index(i[0]))), headers='keys'))"
  834. ]
  835. },
  836. {
  837. "cell_type": "code",
  838. "execution_count": 8,
  839. "metadata": {
  840. "scrolled": true
  841. },
  842. "outputs": [
  843. {
  844. "name": "stdout",
  845. "output_type": "stream",
  846. "text": [
  847. "{'O', 'C'}\n",
  848. "{'O', 'C'}\n",
  849. "--- shortest path kernel built in 0.0002582073211669922 seconds ---\n",
  850. "3\n"
  851. ]
  852. },
  853. {
  854. "data": {
  855. "image/png": "\n",
  856. "text/plain": [
  857. "<matplotlib.figure.Figure at 0x7f773eab40b8>"
  858. ]
  859. },
  860. "metadata": {},
  861. "output_type": "display_data"
  862. },
  863. {
  864. "name": "stdout",
  865. "output_type": "stream",
  866. "text": [
  867. "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'O'})]\n",
  868. " -> \n"
  869. ]
  870. },
  871. {
  872. "data": {
  873. "image/png": "\n",
  874. "text/plain": [
  875. "<matplotlib.figure.Figure at 0x7f773ca1cc88>"
  876. ]
  877. },
  878. "metadata": {},
  879. "output_type": "display_data"
  880. },
  881. {
  882. "name": "stdout",
  883. "output_type": "stream",
  884. "text": [
  885. "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CO'}), (3, {'label': 'CCCO'}), (4, {'label': 'OCC'})]\n",
  886. " -> \n"
  887. ]
  888. },
  889. {
  890. "data": {
  891. "image/png": "\n",
  892. "text/plain": [
  893. "<matplotlib.figure.Figure at 0x7f773c9a44e0>"
  894. ]
  895. },
  896. "metadata": {},
  897. "output_type": "display_data"
  898. },
  899. {
  900. "name": "stdout",
  901. "output_type": "stream",
  902. "text": [
  903. "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '3'}), (3, {'label': '1'}), (4, {'label': '2'})]\n"
  904. ]
  905. },
  906. {
  907. "data": {
  908. "image/png": "\n",
  909. "text/plain": [
  910. "<matplotlib.figure.Figure at 0x7f773c9957b8>"
  911. ]
  912. },
  913. "metadata": {},
  914. "output_type": "display_data"
  915. },
  916. {
  917. "name": "stdout",
  918. "output_type": "stream",
  919. "text": [
  920. "[(0, {'label': 'C'}), (1, {'label': 'C'}), (2, {'label': 'C'}), (3, {'label': 'C'}), (4, {'label': 'C'}), (5, {'label': 'C'}), (6, {'label': 'O'})]\n",
  921. " -> \n"
  922. ]
  923. },
  924. {
  925. "data": {
  926. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xt8VPWd//HXh1tIAJGG+02wgi0WHrgGW9Zqcb2BpSJspXTRiq2lpa7bfWxBRa13C953f72AaBVRXLxQA90lIFrU1kUJKIJSgQCKICWAEKEESODz++NMcEhmkkxmJpOZeT8fj3lkzjnfc84nJ5PPOfM93/P9mrsjIiLZpVmqAxARkcan5C8ikoWU/EVEspCSv4hIFlLyFxHJQkr+IiJZSMlfRCQLKfmLiGQhJX8RkSzUIhEbMbPhwH8BzYHH3X16hDJjgTsAB95z93+pbZsdO3b0Pn36JCI8EZGssWrVqt3u3qmucnEnfzNrDvwWuAjYBhSb2UJ3XxdWph8wFTjH3feaWee6ttunTx9WrlwZb3giIlnFzD6uT7lEVPucDZS4+2Z3PwLMA0ZVK/Nj4LfuvhfA3UsTsF8REWmgRCT/HsAnYdPbQvPC9Qf6m9mbZvZWqJpIRERSJCF1/vXcTz9gGNATeMPMBrr7vvBCZjYRmAjQu3fvRgpNRCT7JOLKfzvQK2y6Z2heuG3AQnevcPctwAaCk8EJ3H2Wuxe4e0GnTnXerxARkQZKRPIvBvqZWV8zawWMAxZWK1NIcNWPmXUkqAbanIB9i4hIA8Sd/N29EvhXYAnwV+B5d//AzO4ys8tCxZYAe8xsHbAMmOLue+Ldt4iINIw11ZG8CgoKXE09RURiY2ar3L2grnJ6wldEJAs1VmsfEZHkKS2F2bNhzRooK4P27WHQILjmGlDjkYiU/EUkfRUXw7RpUFQUTB869MWyP/wBbr8dRoyAqVNhyJDUxNhEqdpHRNLTjBkwbBgUFgZJPzzxA5SXB/MKC4NyM2akIsomS1f+IpJ+ZsyAyZPh4MG6y7oH5SZPDqYnTUpubGlCV/4ikl6Ki2sk/t8ABUAOMCHaelUnALUiBJT8RSTdTJsWVOmE6Q7cCvywrnXLy4P1RclfRNJIaWlwc7fa80ljgMuB/LrWd4dFi2DXriQFmD6U/EUkfcyeHf82zBKznTSn5C8i6WPNmpqtemJVXg5r1yYmnjSm5C8i6aOsLDHb2bs3MdtJY0r+IpI+2rdPzHY6dEjMdtKYkr+IpI9Bg6B16xqzK4FDwNHQ61BoXkS5uTBwYLIiTBtK/iKSPiZMiDj7HiAXmA48E3p/T7RtuEfdTjZR8heR9NG5c9BXj9kJs+8AvNrrjkjrm8Gll6qzN5T8RSTdTJ0aVN00RG5usL4o+YtImhkyBB58EPLyYlsvLy9Yr6DOcU6ygpK/iKSfSZOOnwCO1VXW7IvEX61Tt2effZaCggLatm1Lt27dGDFiBH/5y1+SFnZTouQvIulp0iR2vfgi/9OyJZ6TU7MqKDc3aBk0ejS8/nqNxP/www/z7//+79x8883s3LmTrVu38rOf/YwFCxY04i+ROhrDV0TS1t1338327duZeffdQZcNa9cGD3B16BA055wwIeLN3bKyMnr06MGTTz7JFVdc0ehxJ1N9x/BVf/4ikpYqKyt57LHHWLhwYZDgp0yp97rLly/n0KFDjB49OokRNm2q9hGRtLRo0SJ69OjB4MGDY153z549dOzYkRYtsvf6V8lfRNLSzJkz+elPf9qgdfPz89m9ezeVlVGfA854Sv4ikna2bNnCihUrGDt2bIPWHzp0KDk5ORQWFiY4svSh5C8iaWfWrFn84Ac/ILeBD3u1b9+eu+66i+uuu47CwkIOHjxIRUUFRUVF3HDDDQmOtmnK3govEUlLhw8f5oknnuCNN96Iazu/+MUv6Nq1K/fccw/jx4+nXbt2nHXWWdxyyy0JirRpS0jyN7PhwH8BzYHH3X16lHL/DLwIDHF3teMUkZi99NJLfO1rX+P000+Pe1vjx49n/PjxCYgq/cRd7WNmzYHfAiOAAcD3zWxAhHLtgJ8Db8e7TxHJXjNmzGBStQe2JHaJqPM/Gyhx983ufgSYB4yKUO5u4D6CrrZFRGK2bt06NmzYwKhRkVKMxCIRyb8H8EnY9LbQvOPM7B+AXu7+vwnYn4hkqZkzZ3LttdfSsmXLVIeS9pJ+w9fMmgEPAxPqUXYiMBGgd+/eyQ1MRNLK3//+d+bOncu7776b6lAyQiKu/LcDvcKme4bmVWkHfA14zcw+Ar4BLDSzGn1PuPssdy9w94JOGmxBJKnSrUfLefPmcc455+jCMEESkfyLgX5m1tfMWgHjgIVVC929zN07unsfd+8DvAVcptY+IqmTjj1axvNEr9QUd7WPu1ea2b8CSwiaej7h7h+Y2V3ASndfWPsWRKQxlZWVcdttt/Hkk08yZsyY4/O/853v8J3vfCeFkUW3cuVKdu/ezSWXXJLqUDJGQur83X0RsKjavNuilB2WiH2KSMOkY4+WM2fOZOLEiTRv3jzVoWQMPeErkmXSrUfLffv2MX/+fNavX5/qUDKK+vYRyTLp1qPlnDlzGD58OJ07d051KBlFyV8ky6RTj5burhu9SaLkL5Jl0qlHy6rO284777wUR5J50qPST0QSKl16tKy66jezVIeScTSAu4g0STt37uQrX/kKW7Zs4eSTT051OGmjvgO4q9pHRJqkqucQlPiTQ9U+ItLkHD16lEcffZQXXngh1aFkLF35i0iT8/LLL5Ofn09BQZ21F9JASv4i0uRowJbkU/IXkSZl69atvPnmm4wbNy7VoWQ0JX8RaVIee+wxxo8fT5s2bVIdSkbTDV8RaTIqKir4/e9/z9KlS1MdSsbTlb+INBkLFiygX79+nHHGGakOJeMp+YtIk6F+fBqPkr+INAkbNmxg7dq1JwwwI8mj5C8iTcKjjz7KNddcQ05OTqpDyQq64SsiKVdeXs6cOXNYsWJFqkPJGrryF5GUe+GFFxgyZAh9+/ZNdShZQ8lfRFJuxowZutHbyJT8RSSlVq9ezfbt2/n2t7+d6lCyipK/iKTUzJkz+fGPf0zz5s1THUpW0Q1fEUmZ/fv389xzz7Fu3bpUh5J1dOUvIinzzDPPcMEFF9CtW7dUh5J1lPxFJCXcXV03p5CSv4ikxPLlyzl06BDnn39+qkPJSglJ/mY23MzWm1mJmd0UYfl/mNk6M1tjZq+a2SmJ2K+IpK+q5p3NmukaNBXiPupm1hz4LTACGAB838wGVCv2LlDg7oOAF4H7492viKSv3bt388c//pGrr7461aFkrUSccs8GStx9s7sfAeYBo8ILuPsydz8YmnwL6JmA/YpImpo9ezajRo0iPz8/1aFkrUQ09ewBfBI2vQ34ei3lfwQUJWC/IpKGjh07xqOPPsqcOXNSHUpWa9R2/mZ2JVAAfCvK8onARIDevXs3YmQi0lheffVV2rRpwze+8Y1Uh5LVElHtsx3oFTbdMzTvBGZ2IXALcJm7H460IXef5e4F7l7QqVOnBIQmIk1N1YAtZpbqULJaIpJ/MdDPzPqaWStgHLAwvICZnQk8SpD4SxOwTxFJQ9u3b2fZsmWMHz8+1aFkvbirfdy90sz+FVgCNAeecPcPzOwuYKW7LwQeANoCL4TO9lvd/bJ49y0iTVhpKcyeDWvWQFkZtG/PutJSfnTZZbRr1y7V0WW9hNT5u/siYFG1ebeFvb8wEfsRkTRQXAzTpkFRqF3HoUPHF30TuKBVK/j8c5g6FYYMSU2Moid8RSSBZsyAYcOgsDBI+mGJHyAXaHbkSLB82LCgvKSEevUUkcSYMQMmT4aDB+su6x6Umzw5mFb/Po1OV/4iEr/i4qiJfyPQGrgy0npVJ4CVK5McoFSn5C8i8Zs2DcrLIy66Dqi1Zr+8PFhfGpWSv4jEp7Q0uLnrXmPRPOBk4ILa1neHRYtg164kBSiRKPmLSHxmz444+3PgNuDh+mzDLOp2JDmU/EUkPmvW1GjVA/BLgo686tWLY3k5rF2b4MCkNmrtIyLxKSurMWs18ApBX+71tndvggKS+lDyF5H4tG9fY9ZrwEdAVfeMB4CjwDrgnSibOdq+Pc0TH51EoWofEYnPoEF469YnzJoIbCL4BrAa+CnwbYI+YCI5ZMYdL77ImDFjeOyxx9i2bVsyIxaU/EUkDu5OUZcuHDl8Yke9eUDXsFdbgrb+0frqbZ2Tw7+98w6jR49m2bJlDB48mIEDB3LDDTewbNkyjhw5ksxfIyuZR2ie1RQUFBT4Sj34IdJkFRcXM3nyZPbs2cMrJ51El7fewhqST8xg9GiYP//4rKNHj1JcXExRURFFRUVs2LCB888/nxEjRjBixAh69epVywazm5mtcveCOssp+YtILLZs2cLNN9/MG2+8wZ133smECRNo8e67QV899enaobq8PHj9dSiInq927drFkiVLKCoq4uWXX6Zz587HTwTnnnsurVq1avgvlGHqm/xV7SMi9bJ3714mT55MQUEBX/3qV9mwYQPXXnstLVq0CHrnfPDBIJHHIi8vWK+WxA/QqVMnrrzySubOncvf/vY3nnjiCdq2bcvNN99Mp06dGDVqFDNnzuTjjz+O4zfMLkr+IlKrw4cP8/DDD3P66aezf/9+PvjgA2677TbatGlzYsFJk744AdQ1SpfZF4k/xk7dmjdvzte//nXuuOMO3n77bUpKShg7dixvvvkmQ4YMYcCAAfziF7/glVde4fDhiIMGCqr2EZEo3J3nn3+eqVOncsYZZ3DfffcxYMCAuldcuTLoq2fRoiDJh/f5k5sbdOdw6aVBf/51XPHH6tixY6xater4vYJ169bxrW9963gVUZ8+fRK6v6ZIdf4i0mB//vOfmTx5MpWVlTz44IOcf/75sW9k166gy4a1a4MHuDp0gIEDYcIEaKQxuvfs2cPLL79MUVERixcvJj8///iJ4LzzziMnJyfxO40wghmDBsE11zTK763kLyIxW79+PTfeeCOrV6/m3nvv5fvf/z7NmmVG7fCxY8d45513jn8reP/990/4VtC3b9/4dlDLCGbHv/GMGJH0EcyU/EWk3kpLS7nzzjt5/vnnueGGG7j++utpXe3BrUyzZ88eli5devxbQYcOHU74VhDT7181kE15ecTeTY8zC04EDbjXUV9q7SMJ8eyzz1JQUEDbtm3p1q0bI0aM4C9/+Uuqw5IEOXjwIPfeey8DBgygZcuWfPjhh0yZMiXjEz9Afn4+48aN46mnnmLHjh3MnTuX/Px87rzzTjp37szIkSP57W9/y+bNm2vfUPgIZqHE/xkwGmgDnAI8W1U2fASzVA9h6e5N8nXWWWe5pNZDDz3knTp18vnz5/uBAwf8yJEjvnDhQp88eXKqQ5M4VVZW+pNPPuk9e/b0K664wktKSlIdUpOyZ88enzdvnl999dXepUsX79+/v//85z/3xYsXe3l5+RcFV6xwz8tzD9L68dc48LHg+8H/DH4S+PvVynhenntxccJjB1Z6PXKsqn0g5TdomqKysjJ69OjBk08+yRVXXJHqcCSBXn75ZaZMmULbtm158MEHGTp0aKpDatKOHTvG6tWrj98rWLNmDeeeey4jRoxgwsKFtH3llROqev4OdADeB/qH5l0F9ACmh284wpPNiVDfap+UX+FHezXKlf+KFe6jR7u3bh28ws/KubnBvNGjg3JZpqioyJs3b+4VFRWpDkUS5L333vOLL77YTzvtNJ8/f74fO3Ys1SGlpc8++8yfe+45v/573/Py6lfz4O+A51ab9wD4yAhlvXVr99LShMZHPa/8s7fOf8aM4HH0wsLgrnz1wSjKy4N5hYVBuVTXzzWyPXv20LFjx+DpTUlr27dv54c//CEXXXQRI0eO5IMPPmDMmDFYXQ9iSUQdOnRg7Nix/L9/+AdyItwbOQCcVG1ee2B/pI2lcASz7Ez+EW7QRNWUbtA0ovz8fHbv3k1lZWWqQ5EG2r9/P7feeiuDBg2iS5cubNiwgeuvv1794CTKmjVYhBHM2hIMYRnuc6BdpG2kcASz7Ev+xcVfJP4wVwLdCM7Y/YHHq69XdQLIkuanQ4cOJScnh8LCwlSHIjGqqKhgxowZ9O/fn61bt/Luu+8ybdo02kcYdEXiEGEEMwjyRyWwMWzee8AZ0baTohHMEpL8zWy4ma03sxIzuynC8hwzey60/G0z65OI/TbItGknPm4eMpVg5KHPgYXArcCq6oXKy4P1s0D79u256667uO666ygsLOTgwYNUVFRQVFTEDTfckOrwJAJ3Z8GCBQwcOJD58+ezaNEi5syZQ+/eveteWWIX5WTaBhhDMHj934E3gQUEN30j6tAhCcHVQ31uDNT2ApoTDNpzKtCK4CQ3oFqZnwEzQ+/HAc/Vtd2k3PDdubPmjd0Irw/Bu4I/10g3aJqyZ555xs866yzPy8vzLl26+KWXXupvvvlmqsOSalasWOHnnXeen3HGGb5o0SLdzG0M990XNZ/sAR8FngfeC3xutHyTm+t+//0JDYt63vBNRPIfCiwJm54KTK1WZgkwNPS+BbCb0NPF0V5JSf61/LEcfFLoLj3gZ4ba6DbGHyuldu4Mjsv48e4jRwY/77svq05w6Wzz5s0+btw47969uz/22GNqndWY6nkxWesrzVv79AA+CZveFpoXsYy7VwJlQH4C9h2bNWtqtuoJ8zuCO/J/JvjaFrHLpxTeoEmo4mIYMwZOOQVuvx3mzoX/+Z/g5x13QO/ewfLi4lRHKhHU2re+NI7OnYO+ehraasos6N00Rc8SNakbvmY20cxWmtnKXbt2JX4HUW7QhGsOfJPgDBatbc++jz5i586dHDt2LIHBNSI1c01b4X3rHzhwIHrf+tI4pk4N+uppiNzcYP0UScRlwnYgfEDNnqF5kcpsM7MWBM1e91TfkLvPAmZB8IRvAmI7UQytHSoJbmRE8pf33+ear32N/fv306NHD3r37k2vXr1OeFXNa9++fdNqTx3ezLUu4c1cIWkdUUnd3E/sW/+1116rX9/6klxVI5jV93+qSj1HMEumRCT/YqCfmfUlSPLjgH+pVmYhcDWwHPgu8KdQ3VTjGjQoeJS62pVuKfAnYCSQC7wC/HfoVUNuLiOnTmXXlCkcPHiQbdu28cknnxx/vfPOOxQWFh6fdvcaJ4Tw6Z49e5IX69B3DRWlmSvAPOBOYCvQFZgNnFu1sOoEMGRISj+s2aqqb/2jR4/yxBNPMGzYsFSHJOGqLoqaSK+e9ZWQvn3M7FLgPwlqTZ5w93vN7C6CGw8Lzaw18DRwJkGHd+Pcvdau8pLSt09paVDHXS357yI4I70HHCPohe/fgB9H2kbr1rB1a73r6crKyti6desJJ4jw6W3bttG2bdtaTxDdu3enZcuW8fzmgTFjgqqcan/zpcC1wHPA2cCO0PwTbtwkqR8SiS68b/1f/epXjBs3LmP61s9IKRzBLJz6848mSgKslyQkQHdn165dtZ4gdu7cSadOnaKeIHr16kWXLl1qTwxRTnwA/wj8KPSqVYwnPmmYbOxbP6OkeASz+ib/7GsaMHUqLFkSW/1clSTcoDEzOnfuTOfOnSmIcjVQWVnJp59+esLJYfPmzbz22mvHp6t64Yx036FXr170e+klcoHqdx+OAiuBy4DTgEPA5cADBFVg1YINPtRTpiTwCDRdzz77LA8//DAffvgh7dq1Y/Dgwdxyyy1885vfTMr+Dh48yCOPPMIjjzzCVVddxYcffkh+fuM3ipM4deqUFv8j2Zf80/AGTYsWLejdu3etT2qWl5fXuP/w7rvvsnDhQj755BNuXb+ecRH66dkJVAAvEjRxbQmMAu4B7q25k8xo5loPDz/8MNOnT2fmzJlccskltGrVisWLF7NgwYKEJ/+jR4/y9NNP88tf/pKhQ4fy9ttv8+Uvfzmh+xCpoT4PA6TilfQunX/3u2AwBbPaH8IwC8r97nfJjSfZRo6M+Pt9FnqobXbYvBfBB0c7HiNHpvo3Sbp9+/Z5mzZt/Pnnn0/6vpYsWeKDBg3yc845x5cvX570/Unmo54PeWXflX+VSZOCbwFN4AZNo4jSzLUDQdvc8Oqg2hqmLlmxgheuvZbTTjvt+OvLX/4y7dpF7LMwLS1fvpxDhw4xevTopO1jzZo1TJkyhS1btjB9+nRGjx7dtJoES8bL3uQPQUKfPz/lN2gaRZRmrgDXAL8GhhNU+zxC0Oy1umOtW3PqqFEMOessSkpKePvttykpKWHTpk2cdNJJJ5wQwl8nn3xyUn+1RKt1LIM4R33bvn07v/zlL1m0aBG33norP/nJTxLTkkskRtnX2idb1dLapwL4OcEg062BscD9ofcniNLax93ZsWMHJSUlNV4bN24kJycn6okhPz+/yV3xLl68mJEjR3Lo0KEvTgDFxcG3xKKiYDr8OFZ9SxwxIviWOGRIjW3u37+f++67jxkzZjBx4kRuuukmdbEsSaGmnlJTCpq5eqgpa7QTg7tHPTF06dIlJSeGsrIyunfvzlNPPcV3v/vdL56KbsADPBUVFTz++OPcddddXHzxxdxzzz306tUr+jZE4qTkLzUVFwd99TSkmWteHrz+esLvfXz22WcRTwwlJSUcPHgw6omhe/fuSX3g6aGHHuL+++/n0csv5+Knn6ZleTmvAMsIvhXVKi8Pf/BBFnbvzo033kjPnj154IEHOPPMM5MWr0gVJX+JLJa+fapUNXNt5MfRy8rK2LRpU8QTw759+zj11FMjnhh69epF8+bN497/3Lvv5pE77uCvx47RDjgLuAW4GXiLL26Y9QDWV1v3ULNmXN23LxN+/WuGDx/e5Kq2JHMp+Ut0cVRjNBUHDhxg8+bNEU8MpaWl9OnTJ+KJ4ZRTTqn/DdYo1WTDCIb9vLaWVY+ZweWX0+wPf2jgbyjSMEr+Ursm0g9JMpSXl7Nly5aIJ4ZPP/2Unj17Rjwx9O3bl5yc0CgOtdwgH0bdyR9QdxiSEkr+Uj/Z0Mw1zJEjR/joo48inhi2bt1K165dOe2005h04ACXrVpFywhPRQ8DPiB4Ou50giehh0XaWW4u3HlnWjzqL5lDfftI/aRJPySJ0qpVK/r370///v1rLKusrGTr1q2UlJRwyq23Rkz8APcBAwgGrJ4HfAdYDdTokCGLusOQ9KP+YUVCWrRowamnnsrFF1/M6V26RC33daAdwTCfVwPnAIuiFd67N9FhiiSEkr9IJDE8gGUEVUARdeiQiGhEEk7JXySSQYOCG7bV7AOWEHR9XQnMBd4g6Bqjhtzc4P6JSBOk5C8SyYQJEWdXALcCnYCOBH0iFQI17yAQtJiKsh2RVFPyF4mkc+egr55qD2d1Ihi0ej/Bt4C3gIsirW8WNJXNwBZTkhmU/EWimTo1qLppiCSM+iaSSEr+ItFUjfqWlxfbeikc9U2kvtTOX6Q2Vd1apHl3GCLV6cpfpC6TJgU9mo4eHbQAql4VlJsbzB89OiinxC9pQFf+IvVRbdS3ebfeypjzz6dV584Z3R2GZC717SPSAG3btmXnzp20adMm1aGInKC+ffuo2kekAY4cOUKrVq1SHYZIgyn5i8TI3amsrIw8wLtImogr+ZvZl8xsqZltDP2s0ZGJmQ02s+Vm9oGZrTGz78WzT5FUq6iooEWLFhqdS9JavFf+NwGvuns/4NXQdHUHgR+4+xkEXaD8p5mdHOd+RVJGVT6SCeJN/qOAp0LvnwIur17A3Te4+8bQ+0+BUoKn5EXSkpK/ZIJ4k38Xd98Rev83IHon6ICZnU0wBsamOPcrkjJK/pIJ6rxjZWavAF0jLLolfMLd3cyiths1s27A08DV7n4sSpmJwESA3r171xWaSEpUVFQo+UvaqzP5u/uF0ZaZ2U4z6+buO0LJvTRKuZOA/wVucfe3atnXLGAWBO3864pNJBV05S+ZIN5qn4UEI9kR+rmgegEzawW8BMxx9xfj3J9Iyin5SyaIN/lPBy4ys43AhaFpzKzAzB4PlRkLnAdMMLPVodfgOPcrkjJK/pIJ4npKxd33ABdEmL8SuDb0/hngmXj2I9KUKPlLJtATviIxOnLkCC1btkx1GCJxUfIXiZGu/CUTKPmLxEjJXzKBkr9IjJT8JRMo+YvESMlfMoGSv0iM9ISvZAIlf5EY6cpfMoGSv0iMlPwlEyj5i8RI7fwlEyj5i8RIV/6SCZT8RWKk5C+ZQMlfJEZK/pIJlPxFYqTkL5lAyV8kRkr+kgmU/EVipOQvmUDJXyRGesJXMoGSv0iMdOUvmUDJXyRGeshLMoGSv0iMdOUvmUDJXyRGSv6SCZT8RWKk5C+ZQMlfJEZK/pIJlPxFYqTkL5lAyV8kRkr+kgmU/EVipOQvmUDJXyRGesJXMkFcyd/MvmRmS81sY+hnh1rKnmRm28zsN/HsUyTV9JCXZIJ4r/xvAl51937Aq6HpaO4G3ohzfyIpp2ofyQTxJv9RwFOh908Bl0cqZGZnAV2Al+Pcn0jKKflLJog3+Xdx9x2h938jSPAnMLNmwEPA5Lo2ZmYTzWylma3ctWtXnKGJJIeSv2SCFnUVMLNXgK4RFt0SPuHubmYeodzPgEXuvs3Mat2Xu88CZgEUFBRE2pZIyin5SyaoM/m7+4XRlpnZTjPr5u47zKwbUBqh2FDgXDP7GdAWaGVmB9y9tvsDIk2Wkr9kgjqTfx0WAlcD00M/F1Qv4O7jq96b2QSgQIlf0pmSv2SCeOv8pwMXmdlG4MLQNGZWYGaPxxucSFPj7mrqKRkhrit/d98DXBBh/krg2gjzZwOz49mnSCodPXqUZs2a0bx581SHIhIXPeErEoOKigpd9UtGUPIXiYHq+yVTKPmLxEDJXzKFkr9IDJT8JVMo+YvEQMlfMoWSv0gMlPwlU8T7kJdIdigthdmz6fzGGzz66adw5ZUwaBBccw106pTq6ERiZu5NswudgoICX7lyZarDkGxXXAzTpkFRUTB96NAXy3JzwR1GjICpU2HIkNTEKBLGzFa5e0Fd5VTtIxLNjBkwbBgUFgZJPzzxA5SXB/MKC4NyM2akIkqRBlG1j0gkM2bA5Mlw8GDdZd2DcpNDvZZPmpSE88wLAAAJnUlEQVTc2EQSQFf+ItUVF9dI/IeBHwGnAO2AwUBR9fWqTgCqrpQ0oOQvUt20aUGVTphKoBfwOlAG3AOMBT6qvm55ebC+SBOn5C8SrrQ0uLlbrSFEG+AOoA/BP81IoC+wqvr67rBoEWgkOmnilPxFws2eXa9iO4ENwBmRFprVezsiqaLkLxJuzZqarXqqqQDGE4xe9JVIBcrLYe3axMcmkkBK/iLhyspqXXwMuApoBfymtoJ79yYuJpEkUFNPkXDt20dd5AQtfnYCi4Bae/Xv0CGhYYkkmq78RcINGgStW0dcNAn4K/BHILe2beTmwsCBiY9NJIGU/EXCTZgQcfbHwKPAaqAr0Db0mhupsHvU7Yg0FUr+IuE6dw766jE7YfYpBNU+h4ADYa/x1dc3g0svVWdv0uQp+UvWmT17NgMHDiQvL4+uXbsyadIk9u3b90WBqVODqpuGyM0N1hdp4pT8Jas89NBD3HjjjTzwwAOUlZXx1ltv8fHHH3PRRRdx5MiRoNCQIfDgg5CXF9vG8/KC9Qrq7FBRJOWU/CVrfP7559x+++38+te/Zvjw4bRs2ZI+ffrw/PPP89FHH/HMM898UXjSpC9OANWqgGow+yLxq1M3SRNK/pI1/u///o9Dhw4xZsyYE+a3bduWSy+9lKVLl564wqRJ8PrrMHp00AKoelVQbm4wf/TooJwSv6QRtfOXrLF79246duxIixY1P/bdunVj1aoaPfUEVTjz5wd99cyeHTy5u3dv0I5/4MCgVY9u7koaUvKXrNGxY0d2795NZWVljRPAjh076NixY/SVO3WCKVOSHKFI44mr2sfMvmRmS81sY+hnxMcazay3mb1sZn81s3Vm1iee/Yo0xNChQ8nJyeEPf/jDCfMPHDhAUVERF1xwQYoiE2l88db53wS86u79gFdD05HMAR5w968CZwOlce5XJGbt27fn9ttv5/rrr2fx4sVUVFTw0UcfMXbsWHr27MlVV12V6hBFGk281T6jgGGh908BrwE3hhcwswFAC3dfCuDuB+Lcp0iD3XDDDeTn5zN58mQ2bdrESSedxOWXX87cuXPJyclJdXgijca82qAVMa1sts/dTw69N2Bv1XRYmcuBa4EjBONfvALc5O5HI2xvIjARoHfv3md9/PHHDY5NRCQbmdkqd6/zYZM6r/zN7BWC7kyquyV8wt3dzCKdSVoA5wJnAluB54AJwO+rF3T3WcAsgIKCgoaflUREpFZ1Jn93vzDaMjPbaWbd3H2HmXUjcl3+NmC1u28OrVMIfIMIyV9ERBpHvDd8FxIMaETo54IIZYqBk82sqjH0PwHr4tyviIjEId7kPx24yMw2AheGpjGzAjN7HCBUtz8ZeNXM1gIGPBbnfkVEJA5x3fBNJjPbRdCNemPrCOxOwX7rorhi01TjgqYbm+KKXVOM7RR3r/Ox8yab/FPFzFbW5055Y1NcsWmqcUHTjU1xxa4px1YXdewmIpKFlPxFRLKQkn9Ns1IdQBSKKzZNNS5ourEprtg15dhqpTp/EZEspCt/EZEslHXJ38yuMLMPzOyYmUW9S29mw81svZmVmNlNYfP7mtnbofnPmVmrBMZWZxfZZna+ma0Oex0K9Z+Emc02sy1hywY3VlyhckfD9r0wbH5Sjlk9j9dgM1se+puvMbPvhS1L6PGK9pkJW54T+v1LQsejT9iyqaH5683sknjiaEBc/xHqan2Nmb1qZqeELYv4N23E2CaY2a6wGK4NW3Z16G+/0cyurr5ukuN6JCymDWa2L2xZUo9Zwrh7Vr2ArwKnE/RAWhClTHNgE3Aq0Ap4DxgQWvY8MC70fiYwKYGx3U/Q6R0E3WPfV0f5LwGfAXmh6dnAd5NwzOoVF3AgyvykHLP6xAX0B/qF3ncHdgAnJ/p41faZCSvzM2Bm6P044LnQ+wGh8jkEnR9uApo3Ylznh32GJlXFVdvftBFjmwD8JsK6XwI2h352CL3v0FhxVSt/PfBEYxyzRL6y7srf3f/q7uvrKHY2UOLum939CDAPGGVmRtA9xYuhck8BlycwvFGhbdZ3298Fitz9YAJjiCTWuI5L8jGrMy533+DuG0PvPyXofyoZ4y5G/MzUEu+LwAWh4zMKmOfuh919C1AS2l6jxOXuy8I+Q28BPRO077hjq8UlwFJ3/8zd9wJLgeEpiuv7wH8naN+NJuuSfz31AD4Jm94WmpcP7HP3ymrzE6WLu+8Ivf8b0KWO8uOo+aG7N/T1/REzS1QH9fWNq7WZrTSzt6qqokjuMYvpeJnZ2QRXcpvCZifqeEX7zEQsEzoeZQTHpz7rJjOucD8CisKmI/1NE6W+sf1z6G/0opn1inHdZMZFqIqsL/CnsNnJPGYJk5Fj+Fot3VC7e6TO5xpNbbGFT7hH7SK7ajvdgIHAkrDZUwmSYCuCJmg3Anc1YlynuPt2MzsV+JMFfTmV1Wf/SY6r6ng9DVzt7sdCsxt8vDKRmV0JFADfCptd42/q7psibyEp/gj8t7sfNrOfEHxz+qdG3H9dxgEv+onjk6T6mNVLRiZ/r6Ub6nraDvQKm+4ZmreHoIfSFqErt6r5CYnN6tdFdpWxwEvuXhG27aqr4MNm9iRBh3qNFpe7bw/93GxmrxGM4TCfOI5ZIuIys5OA/yU4+b8Vtu0GH68Ion1mIpXZZmYtgPYEn6n6rJvMuDCzCwlOqN9y98NV86P8TROVyOqMzd33hE0+TnCfp2rdYdXWfa2x4gozDrgufEaSj1nCqNonsmKgnwWtVFoR/IEXenA3ZxlBXTtE78a6oerTRXaVGvWMoQRYVc9+OfB+Y8VlZh2qqk3MrCNwDrAuycesPnG1Al4C5rj7i9WWJfJ4RfzM1BLvd4E/hY7PQmBcqDVQX6AfsCKOWGKKy8zOBB4FLnP30rD5Ef+mCYqrvrF1C5u8DPhr6P0S4OJQjB2AiznxW3BS4wrF9hWCm83Lw+Yl+5glTqrvODf2CxhNUId3GNgJLAnN7w4sCit3KbCB4Ix9S9j8Uwn+MUuAF4CcBMaWD7wKbCQY7vJLofkFwONh5foQXIk0q7b+n4C1BEnsGaBtY8UF/GNo3++Ffv4o2cesnnFdCVQAq8Neg5NxvCJ9ZgiqkS4LvW8d+v1LQsfj1LB1bwmttx4YkeDPfF1xvRL6X6g6Pgvr+ps2YmzTgA9CMSwDvhK27g9Dx7IEuKYx4wpN3wFMr7Ze0o9Zol56wldEJAup2kdEJAsp+YuIZCElfxGRLKTkLyKShZT8RUSykJK/iEgWUvIXEclCSv4iIlno/wNHn92ZBgkRVQAAAABJRU5ErkJggg==\n",
  927. "text/plain": [
  928. "<matplotlib.figure.Figure at 0x7f7788e0e390>"
  929. ]
  930. },
  931. "metadata": {},
  932. "output_type": "display_data"
  933. },
  934. {
  935. "name": "stdout",
  936. "output_type": "stream",
  937. "text": [
  938. "[(0, {'label': 'CC'}), (1, {'label': 'CC'}), (2, {'label': 'CC'}), (3, {'label': 'CO'}), (4, {'label': 'CCCC'}), (5, {'label': 'CCCO'}), (6, {'label': 'OCC'})]\n",
  939. " -> \n"
  940. ]
  941. },
  942. {
  943. "data": {
  944. "image/png": "\n",
  945. "text/plain": [
  946. "<matplotlib.figure.Figure at 0x7f773c95a5f8>"
  947. ]
  948. },
  949. "metadata": {},
  950. "output_type": "display_data"
  951. },
  952. {
  953. "name": "stdout",
  954. "output_type": "stream",
  955. "text": [
  956. "[(0, {'label': '0'}), (1, {'label': '0'}), (2, {'label': '0'}), (3, {'label': '3'}), (4, {'label': '4'}), (5, {'label': '1'}), (6, {'label': '2'})]\n",
  957. "--- shortest path kernel built in 0.00026607513427734375 seconds ---\n",
  958. "6\n"
  959. ]
  960. }
  961. ],
  962. "source": [
  963. "import sys\n",
  964. "import networkx as nx\n",
  965. "sys.path.insert(0, \"../\")\n",
  966. "from pygraph.utils.graphfiles import loadDataset\n",
  967. "from pygraph.kernels.spkernel import spkernel\n",
  968. "\n",
  969. "import matplotlib.pyplot as plt\n",
  970. "\n",
  971. "\n",
  972. "def weisfeilerlehman_test(G):\n",
  973. " '''\n",
  974. " Weisfeiler-Lehman test of graph isomorphism.\n",
  975. " '''\n",
  976. "\n",
  977. " nx.draw_networkx(G)\n",
  978. " plt.show()\n",
  979. " nx.draw_networkx_labels(G, nx.spring_layout(G), labels = nx.get_node_attributes(G,'label'))\n",
  980. " print(G.nodes(data = True))\n",
  981. " \n",
  982. " set_multisets = []\n",
  983. " for node in G.nodes(data = True):\n",
  984. " # Multiset-label determination.\n",
  985. " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n",
  986. " # sorting each multiset\n",
  987. " multiset.sort()\n",
  988. " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n",
  989. " set_multisets.append(multiset)\n",
  990. " \n",
  991. " # label compression\n",
  992. "# set_multisets.sort() # this is unnecessary\n",
  993. " set_unique = list(set(set_multisets)) # set of unique multiset labels\n",
  994. " set_compressed = { value : str(set_unique.index(value)) for value in set_unique } # assign indices as the new labels\n",
  995. "# print(set_compressed)\n",
  996. "# print(set_multisets)\n",
  997. " \n",
  998. " # relabel nodes with multisets\n",
  999. " for node in G.nodes(data = True):\n",
  1000. " node[1]['label'] = set_multisets[node[0]]\n",
  1001. " print(' -> ')\n",
  1002. " nx.draw_networkx(G)\n",
  1003. " plt.show()\n",
  1004. " print(G.nodes(data = True))\n",
  1005. "\n",
  1006. " \n",
  1007. " # relabel nodes\n",
  1008. " for node in G.nodes(data = True):\n",
  1009. " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n",
  1010. " \n",
  1011. " print(' -> ')\n",
  1012. " nx.draw_networkx(G)\n",
  1013. " plt.show()\n",
  1014. " print(G.nodes(data = True))\n",
  1015. "\n",
  1016. "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
  1017. "G1 = dataset[12]\n",
  1018. "G2 = dataset[55]\n",
  1019. "\n",
  1020. "# init.\n",
  1021. "kernel = 0 # init kernel\n",
  1022. "num_nodes1 = G1.number_of_nodes()\n",
  1023. "num_nodes2 = G2.number_of_nodes()\n",
  1024. "\n",
  1025. "# the first iteration.\n",
  1026. "labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1027. "labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1028. "print(labelset1)\n",
  1029. "print(labelset2)\n",
  1030. "kernel += spkernel(G1, G2)\n",
  1031. "print(kernel)\n",
  1032. "\n",
  1033. "\n",
  1034. "\n",
  1035. "for height in range(0, min(num_nodes1, num_nodes2)): #Q how to determine the upper bound of the height?\n",
  1036. " if labelset1 != labelset2:\n",
  1037. " break\n",
  1038. " \n",
  1039. " # Weisfeiler-Lehman test of graph isomorphism.\n",
  1040. " weisfeilerlehman_test(G1)\n",
  1041. " weisfeilerlehman_test(G2)\n",
  1042. " \n",
  1043. " # calculate kernel\n",
  1044. " kernel += spkernel(G1, G2)\n",
  1045. " \n",
  1046. " # get label sets of both graphs\n",
  1047. " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1048. " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1049. "# print(labelset1)\n",
  1050. "# print(labelset2)\n",
  1051. "\n",
  1052. "print(kernel)"
  1053. ]
  1054. },
  1055. {
  1056. "cell_type": "code",
  1057. "execution_count": 20,
  1058. "metadata": {
  1059. "scrolled": false
  1060. },
  1061. "outputs": [
  1062. {
  1063. "name": "stdout",
  1064. "output_type": "stream",
  1065. "text": [
  1066. "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}\n",
  1067. "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'C', 6: 'S', 7: 'S'}\n",
  1068. "\n",
  1069. " --- height = 0 --- \n",
  1070. "\n",
  1071. " --- for graph 0 --- \n",
  1072. "\n",
  1073. "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n",
  1074. "all_labels_ori: {'C', 'O'}\n",
  1075. "num_of_each_label: {'C': 5, 'O': 2}\n",
  1076. "all_num_of_each_label: [{'C': 5, 'O': 2}]\n",
  1077. "num_of_labels: 2\n",
  1078. "all_labels_ori: {'C', 'O'}\n",
  1079. "\n",
  1080. " --- for graph 1 --- \n",
  1081. "\n",
  1082. "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n",
  1083. "all_labels_ori: {'C', 'O', 'S'}\n",
  1084. "num_of_each_label: {'C': 6, 'S': 2}\n",
  1085. "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n",
  1086. "num_of_labels: 2\n",
  1087. "all_labels_ori: {'C', 'O', 'S'}\n",
  1088. "\n",
  1089. " all_num_of_labels_occured: 3\n",
  1090. "\n",
  1091. " --- calculating kernel matrix ---\n",
  1092. "\n",
  1093. " labels: {'C', 'O'}\n",
  1094. "vector1: [[5 2]]\n",
  1095. "vector2: [[5 2]]\n",
  1096. "Kmatrix: [[ 29. 0.]\n",
  1097. " [ 0. 0.]]\n",
  1098. "\n",
  1099. " labels: {'C', 'O', 'S'}\n",
  1100. "vector1: [[5 2 0]]\n",
  1101. "vector2: [[6 0 2]]\n",
  1102. "Kmatrix: [[ 29. 30.]\n",
  1103. " [ 30. 0.]]\n",
  1104. "\n",
  1105. " labels: {'C', 'S'}\n",
  1106. "vector1: [[6 2]]\n",
  1107. "vector2: [[6 2]]\n",
  1108. "Kmatrix: [[ 29. 30.]\n",
  1109. " [ 30. 40.]]\n",
  1110. "\n",
  1111. " --- height = 1 --- \n",
  1112. "\n",
  1113. " --- for graph 0 --- \n",
  1114. "\n",
  1115. "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n",
  1116. "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n",
  1117. "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
  1118. "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
  1119. "num_of_labels_occured: 7\n",
  1120. "\n",
  1121. " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n",
  1122. "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n",
  1123. "all_labels_ori: {'5', '4', '6', '7'}\n",
  1124. "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n",
  1125. "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n",
  1126. "\n",
  1127. " --- for graph 1 --- \n",
  1128. "\n",
  1129. "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n",
  1130. "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n",
  1131. "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n",
  1132. "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n",
  1133. "num_of_labels_occured: 10\n",
  1134. "\n",
  1135. " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n",
  1136. "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n",
  1137. "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n",
  1138. "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n",
  1139. "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n",
  1140. "\n",
  1141. " all_num_of_labels_occured: 10\n",
  1142. "\n",
  1143. " --- calculating kernel matrix ---\n",
  1144. "\n",
  1145. " labels: {'5', '4', '6', '7'}\n",
  1146. "vector1: [[1 2 2 2]]\n",
  1147. "vector2: [[1 2 2 2]]\n",
  1148. "\n",
  1149. " labels: {'10', '4', '7', '9', '6', '5', '8'}\n",
  1150. "vector1: [[0 2 2 0 2 1 0]]\n",
  1151. "vector2: [[1 0 3 2 0 0 2]]\n",
  1152. "\n",
  1153. " labels: {'8', '10', '7', '9'}\n",
  1154. "vector1: [[2 1 3 2]]\n",
  1155. "vector2: [[2 1 3 2]]\n",
  1156. "\n",
  1157. " Kmatrix: [[ 42. 36.]\n",
  1158. " [ 36. 58.]]\n",
  1159. "\n",
  1160. " --- height = 2 --- \n",
  1161. "\n",
  1162. " --- for graph 0 --- \n",
  1163. "\n",
  1164. "multiset: ['76', '76', '647', '647', '544', '456', '456']\n",
  1165. "set_unique: ['647', '76', '456', '544']\n",
  1166. "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n",
  1167. "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n",
  1168. "num_of_labels_occured: 14\n",
  1169. "\n",
  1170. " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n",
  1171. "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n",
  1172. "all_labels_ori: {'14', '12', '11', '13'}\n",
  1173. "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n",
  1174. "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n",
  1175. "\n",
  1176. " --- for graph 1 --- \n",
  1177. "\n",
  1178. "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n",
  1179. "set_unique: ['710', '8109', '79', '10788', '978']\n",
  1180. "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n",
  1181. "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n",
  1182. "num_of_labels_occured: 19\n",
  1183. "\n",
  1184. " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n",
  1185. "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n",
  1186. "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
  1187. "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n",
  1188. "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n",
  1189. "\n",
  1190. " all_num_of_labels_occured: 19\n",
  1191. "\n",
  1192. " --- calculating kernel matrix ---\n",
  1193. "\n",
  1194. " labels: {'14', '12', '11', '13'}\n",
  1195. "vector1: [[1 2 2 2]]\n",
  1196. "vector2: [[1 2 2 2]]\n",
  1197. "\n",
  1198. " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
  1199. "vector1: [[0 0 2 2 0 2 1 0 0]]\n",
  1200. "vector2: [[1 2 0 0 2 0 0 2 1]]\n",
  1201. "\n",
  1202. " labels: {'18', '17', '15', '16', '19'}\n",
  1203. "vector1: [[1 2 1 2 2]]\n",
  1204. "vector2: [[1 2 1 2 2]]\n",
  1205. "\n",
  1206. " Kmatrix: [[ 55. 36.]\n",
  1207. " [ 36. 72.]]\n",
  1208. "\n",
  1209. " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n"
  1210. ]
  1211. },
  1212. {
  1213. "data": {
  1214. "text/plain": [
  1215. "array([[ 55., 36.],\n",
  1216. " [ 36., 72.]])"
  1217. ]
  1218. },
  1219. "execution_count": 20,
  1220. "metadata": {},
  1221. "output_type": "execute_result"
  1222. }
  1223. ],
  1224. "source": [
  1225. "# test of WL subtree kernel on many graphs\n",
  1226. "\n",
  1227. "import sys\n",
  1228. "import pathlib\n",
  1229. "from collections import Counter\n",
  1230. "sys.path.insert(0, \"../\")\n",
  1231. "\n",
  1232. "import networkx as nx\n",
  1233. "import numpy as np\n",
  1234. "import time\n",
  1235. "\n",
  1236. "from pygraph.kernels.spkernel import spkernel\n",
  1237. "from pygraph.kernels.pathKernel import pathkernel\n",
  1238. "\n",
  1239. "def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):\n",
  1240. " \"\"\"Calculate Weisfeiler-Lehman kernels between graphs.\n",
  1241. " \n",
  1242. " Parameters\n",
  1243. " ----------\n",
  1244. " Gn : List of NetworkX graph\n",
  1245. " List of graphs between which the kernels are calculated.\n",
  1246. " /\n",
  1247. " G1, G2 : NetworkX graphs\n",
  1248. " 2 graphs between which the kernel is calculated.\n",
  1249. " \n",
  1250. " height : subtree height\n",
  1251. " \n",
  1252. " base_kernel : base kernel used in each iteration of WL kernel\n",
  1253. " the default base kernel is subtree kernel\n",
  1254. " \n",
  1255. " Return\n",
  1256. " ------\n",
  1257. " Kmatrix/Kernel : Numpy matrix/int\n",
  1258. " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs.\n",
  1259. " \n",
  1260. " Notes\n",
  1261. " -----\n",
  1262. " This function now supports WL subtree kernel and WL shortest path kernel.\n",
  1263. " \n",
  1264. " References\n",
  1265. " ----------\n",
  1266. " [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61.\n",
  1267. " \"\"\"\n",
  1268. " if len(args) == 1: # for a list of graphs\n",
  1269. "\n",
  1270. "# print(args)\n",
  1271. " start_time = time.time()\n",
  1272. " \n",
  1273. " # for WL subtree kernel\n",
  1274. " if base_kernel == 'subtree': \n",
  1275. " Kmatrix = _wl_subtreekernel_do(args[0], height = height, base_kernel = 'subtree')\n",
  1276. " \n",
  1277. " # for WL edge kernel\n",
  1278. " elif base_kernel == 'edge':\n",
  1279. " print('edge')\n",
  1280. " \n",
  1281. " # for WL shortest path kernel\n",
  1282. " elif base_kernel == 'sp':\n",
  1283. " Gn = args[0]\n",
  1284. " Kmatrix = np.zeros((len(Gn), len(Gn)))\n",
  1285. " \n",
  1286. " for i in range(0, len(Gn)):\n",
  1287. " for j in range(i, len(Gn)):\n",
  1288. " Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j])\n",
  1289. " Kmatrix[j][i] = Kmatrix[i][j]\n",
  1290. "\n",
  1291. " print(\"\\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---\" % (base_kernel, len(args[0]), (time.time() - start_time)))\n",
  1292. " \n",
  1293. " return Kmatrix\n",
  1294. " \n",
  1295. " else: # for only 2 graphs\n",
  1296. " \n",
  1297. " start_time = time.time()\n",
  1298. " \n",
  1299. " # for WL subtree kernel\n",
  1300. " if base_kernel == 'subtree':\n",
  1301. " \n",
  1302. " args = [args[0], args[1]]\n",
  1303. "# print(args)\n",
  1304. " kernel = _wl_subtreekernel_do(args, height = height, base_kernel = 'subtree')\n",
  1305. " \n",
  1306. " # for WL edge kernel\n",
  1307. " elif base_kernel == 'edge':\n",
  1308. " print('edge')\n",
  1309. " \n",
  1310. " # for WL shortest path kernel\n",
  1311. " elif base_kernel == 'sp':\n",
  1312. " \n",
  1313. "\n",
  1314. " kernel = _pathkernel_do(args[0], args[1])\n",
  1315. "\n",
  1316. " print(\"\\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---\" % (base_kernel, time.time() - start_time))\n",
  1317. " \n",
  1318. " return kernel\n",
  1319. " \n",
  1320. " \n",
  1321. "def _weisfeilerlehmankernel_do(G1, G2):\n",
  1322. " \"\"\"Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration.\n",
  1323. " \n",
  1324. " Parameters\n",
  1325. " ----------\n",
  1326. " G1, G2 : NetworkX graphs\n",
  1327. " 2 graphs between which the kernel is calculated.\n",
  1328. " \n",
  1329. " Return\n",
  1330. " ------\n",
  1331. " Kernel : int\n",
  1332. " Weisfeiler-Lehman Kernel between 2 graphs.\n",
  1333. " \"\"\"\n",
  1334. " \n",
  1335. " # init.\n",
  1336. " kernel = 0 # init kernel\n",
  1337. " num_nodes1 = G1.number_of_nodes()\n",
  1338. " num_nodes2 = G2.number_of_nodes()\n",
  1339. " height = 12 #min(num_nodes1, num_nodes2)) #Q how to determine the upper bound of the height?\n",
  1340. " \n",
  1341. " # the first iteration.\n",
  1342. " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1343. " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1344. " kernel += pathkernel(G1, G2) # change your base kernel here (and one more below)\n",
  1345. " \n",
  1346. " for h in range(0, height):\n",
  1347. "# if labelset1 != labelset2:\n",
  1348. "# break\n",
  1349. "\n",
  1350. " # Weisfeiler-Lehman test of graph isomorphism.\n",
  1351. " relabel(G1)\n",
  1352. " relabel(G2)\n",
  1353. "\n",
  1354. " # calculate kernel\n",
  1355. " kernel += pathkernel(G1, G2) # change your base kernel here (and one more before)\n",
  1356. "\n",
  1357. " # get label sets of both graphs\n",
  1358. " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1359. " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1360. " \n",
  1361. " return kernel\n",
  1362. "\n",
  1363. "\n",
  1364. "def relabel(G):\n",
  1365. " '''\n",
  1366. " Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism.\n",
  1367. " \n",
  1368. " Parameters\n",
  1369. " ----------\n",
  1370. " G : NetworkX graph\n",
  1371. " The graphs whose nodes are relabeled.\n",
  1372. " '''\n",
  1373. " \n",
  1374. " # get the set of original labels\n",
  1375. " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
  1376. " print(labels_ori)\n",
  1377. " num_of_each_label = dict(Counter(labels_ori))\n",
  1378. " print(num_of_each_label)\n",
  1379. " num_of_labels = len(num_of_each_label)\n",
  1380. " print(num_of_labels)\n",
  1381. " \n",
  1382. " set_multisets = []\n",
  1383. " for node in G.nodes(data = True):\n",
  1384. " # Multiset-label determination.\n",
  1385. " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n",
  1386. " # sorting each multiset\n",
  1387. " multiset.sort()\n",
  1388. " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n",
  1389. " set_multisets.append(multiset)\n",
  1390. " print(set_multisets)\n",
  1391. " \n",
  1392. " # label compression\n",
  1393. "# set_multisets.sort() # this is unnecessary\n",
  1394. " set_unique = list(set(set_multisets)) # set of unique multiset labels\n",
  1395. " print(set_unique)\n",
  1396. " set_compressed = { value : str(set_unique.index(value) + num_of_labels + 1) for value in set_unique } # assign new labels\n",
  1397. " print(set_compressed)\n",
  1398. " \n",
  1399. " # relabel nodes\n",
  1400. "# nx.relabel_nodes(G, set_compressed, copy = False)\n",
  1401. " for node in G.nodes(data = True):\n",
  1402. " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n",
  1403. " print(nx.get_node_attributes(G, 'label'))\n",
  1404. "\n",
  1405. " # get the set of compressed labels\n",
  1406. " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n",
  1407. " print(labels_comp)\n",
  1408. " num_of_each_label.update(dict(Counter(labels_comp)))\n",
  1409. " print(num_of_each_label)\n",
  1410. " \n",
  1411. " \n",
  1412. "def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):\n",
  1413. " \"\"\"Calculate Weisfeiler-Lehman subtree kernels between graphs.\n",
  1414. " \n",
  1415. " Parameters\n",
  1416. " ----------\n",
  1417. " Gn : List of NetworkX graph\n",
  1418. " List of graphs between which the kernels are calculated.\n",
  1419. " \n",
  1420. " Return\n",
  1421. " ------\n",
  1422. " Kmatrix/Kernel : Numpy matrix/int\n",
  1423. " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.\n",
  1424. " \"\"\"\n",
  1425. " \n",
  1426. "# print(args)\n",
  1427. " Gn = args[0]\n",
  1428. "# print(Gn)\n",
  1429. "\n",
  1430. " Kmatrix = np.zeros((len(Gn), len(Gn)))\n",
  1431. " all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs\n",
  1432. " \n",
  1433. " # initial for height = 0\n",
  1434. " print('\\n --- height = 0 --- ')\n",
  1435. " all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n",
  1436. " all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n",
  1437. " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n",
  1438. " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n",
  1439. "\n",
  1440. " # for each graph\n",
  1441. " for idx, G in enumerate(Gn):\n",
  1442. " # get the set of original labels\n",
  1443. " print('\\n --- for graph %d --- \\n' % (idx))\n",
  1444. " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
  1445. " print('labels_ori: %s' % (labels_ori))\n",
  1446. " all_labels_ori.update(labels_ori)\n",
  1447. " print('all_labels_ori: %s' % (all_labels_ori))\n",
  1448. " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
  1449. " print('num_of_each_label: %s' % (num_of_each_label))\n",
  1450. " all_num_of_each_label.append(num_of_each_label)\n",
  1451. " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n",
  1452. " num_of_labels = len(num_of_each_label) # number of all unique labels\n",
  1453. " print('num_of_labels: %s' % (num_of_labels))\n",
  1454. " \n",
  1455. "\n",
  1456. " all_labels_ori.update(labels_ori)\n",
  1457. " print('all_labels_ori: %s' % (all_labels_ori))\n",
  1458. " \n",
  1459. " all_num_of_labels_occured += len(all_labels_ori)\n",
  1460. " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
  1461. " \n",
  1462. " # calculate subtree kernel with the 0th iteration and add it to the final kernel\n",
  1463. " print('\\n --- calculating kernel matrix ---')\n",
  1464. " for i in range(0, len(Gn)):\n",
  1465. " for j in range(i, len(Gn)):\n",
  1466. " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
  1467. " print('\\n labels: %s' % (labels))\n",
  1468. " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
  1469. " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
  1470. " print('vector1: %s' % (vector1))\n",
  1471. " print('vector2: %s' % (vector2))\n",
  1472. " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
  1473. " Kmatrix[j][i] = Kmatrix[i][j]\n",
  1474. " print('Kmatrix: %s' % (Kmatrix))\n",
  1475. "\n",
  1476. " \n",
  1477. " # iterate each height\n",
  1478. " for h in range(1, height + 1):\n",
  1479. " print('\\n --- height = %d --- ' % (h))\n",
  1480. " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n",
  1481. " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n",
  1482. " all_labels_ori = set()\n",
  1483. " all_num_of_each_label = []\n",
  1484. " \n",
  1485. " # for each graph\n",
  1486. " for idx, G in enumerate(Gn):\n",
  1487. "# # get the set of original labels\n",
  1488. " print('\\n --- for graph %d --- \\n' % (idx))\n",
  1489. "# labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
  1490. "# print('labels_ori: %s' % (labels_ori))\n",
  1491. "# num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
  1492. "# print('num_of_each_label: %s' % (num_of_each_label))\n",
  1493. "# num_of_labels = len(num_of_each_label) # number of all unique labels\n",
  1494. "# print('num_of_labels: %s' % (num_of_labels))\n",
  1495. " \n",
  1496. "# all_labels_ori.update(labels_ori)\n",
  1497. "# print('all_labels_ori: %s' % (all_labels_ori))\n",
  1498. "# # num_of_labels_occured += num_of_labels #@todo not precise\n",
  1499. "# num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n",
  1500. "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
  1501. " \n",
  1502. " set_multisets = []\n",
  1503. " for node in G.nodes(data = True):\n",
  1504. " # Multiset-label determination.\n",
  1505. " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n",
  1506. " # sorting each multiset\n",
  1507. " multiset.sort()\n",
  1508. " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n",
  1509. " set_multisets.append(multiset)\n",
  1510. " print('multiset: %s' % (set_multisets))\n",
  1511. "\n",
  1512. " # label compression\n",
  1513. " # set_multisets.sort() # this is unnecessary\n",
  1514. " set_unique = list(set(set_multisets)) # set of unique multiset labels\n",
  1515. " print('set_unique: %s' % (set_unique))\n",
  1516. " # a dictionary mapping original labels to new ones. \n",
  1517. " set_compressed = {}\n",
  1518. " # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label \n",
  1519. " for value in set_unique:\n",
  1520. " if value in all_set_compressed.keys():\n",
  1521. " set_compressed.update({ value : all_set_compressed[value] })\n",
  1522. " else:\n",
  1523. " set_compressed.update({ value : str(num_of_labels_occured + 1) })\n",
  1524. " num_of_labels_occured += 1\n",
  1525. "# set_compressed = { value : (all_set_compressed[value] if value in all_set_compressed.keys() else str(set_unique.index(value) + num_of_labels_occured + 1)) for value in set_unique }\n",
  1526. " print('set_compressed: %s' % (set_compressed))\n",
  1527. " \n",
  1528. " all_set_compressed.update(set_compressed)\n",
  1529. " print('all_set_compressed: %s' % (all_set_compressed))\n",
  1530. "# num_of_labels_occured += len(set_compressed) #@todo not precise\n",
  1531. " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
  1532. " \n",
  1533. " # relabel nodes\n",
  1534. " # nx.relabel_nodes(G, set_compressed, copy = False)\n",
  1535. " for node in G.nodes(data = True):\n",
  1536. " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n",
  1537. " print('\\n compressed labels: %s' % (nx.get_node_attributes(G, 'label')))\n",
  1538. "\n",
  1539. " # get the set of compressed labels\n",
  1540. " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n",
  1541. " print('labels_comp: %s' % (labels_comp))\n",
  1542. " all_labels_ori.update(labels_comp)\n",
  1543. " print('all_labels_ori: %s' % (all_labels_ori))\n",
  1544. " num_of_each_label = dict(Counter(labels_comp))\n",
  1545. " print('num_of_each_label: %s' % (num_of_each_label))\n",
  1546. " all_num_of_each_label.append(num_of_each_label)\n",
  1547. " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n",
  1548. " \n",
  1549. " all_num_of_labels_occured += len(all_labels_ori)\n",
  1550. " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
  1551. " \n",
  1552. " # calculate subtree kernel with h iterations and add it to the final kernel\n",
  1553. " print('\\n --- calculating kernel matrix ---')\n",
  1554. " for i in range(0, len(Gn)):\n",
  1555. " for j in range(i, len(Gn)):\n",
  1556. " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
  1557. " print('\\n labels: %s' % (labels))\n",
  1558. " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
  1559. " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
  1560. " print('vector1: %s' % (vector1))\n",
  1561. " print('vector2: %s' % (vector2))\n",
  1562. " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
  1563. " Kmatrix[j][i] = Kmatrix[i][j]\n",
  1564. " \n",
  1565. " print('\\n Kmatrix: %s' % (Kmatrix))\n",
  1566. "\n",
  1567. " return Kmatrix\n",
  1568. "\n",
  1569. " \n",
  1570. "# main\n",
  1571. "import sys\n",
  1572. "from collections import Counter\n",
  1573. "import networkx as nx\n",
  1574. "sys.path.insert(0, \"../\")\n",
  1575. "from pygraph.utils.graphfiles import loadDataset\n",
  1576. "from pygraph.kernels.spkernel import spkernel\n",
  1577. "\n",
  1578. "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
  1579. "G1 = dataset[15]\n",
  1580. "print(nx.get_node_attributes(G1, 'label'))\n",
  1581. "G2 = dataset[80]\n",
  1582. "print(nx.get_node_attributes(G2, 'label'))\n",
  1583. "\n",
  1584. "weisfeilerlehmankernel(G1, G2, height = 2)\n",
  1585. "# Kmatrix = weisfeilerlehmankernel(G1, G2)"
  1586. ]
  1587. },
  1588. {
  1589. "cell_type": "code",
  1590. "execution_count": 4,
  1591. "metadata": {},
  1592. "outputs": [
  1593. {
  1594. "data": {
  1595. "text/plain": [
  1596. "185"
  1597. ]
  1598. },
  1599. "execution_count": 4,
  1600. "metadata": {},
  1601. "output_type": "execute_result"
  1602. }
  1603. ],
  1604. "source": [
  1605. "\n",
  1606. "len(dataset)"
  1607. ]
  1608. },
  1609. {
  1610. "cell_type": "code",
  1611. "execution_count": 1,
  1612. "metadata": {
  1613. "scrolled": true
  1614. },
  1615. "outputs": [
  1616. {
  1617. "name": "stdout",
  1618. "output_type": "stream",
  1619. "text": [
  1620. "\n",
  1621. "- This script take as input a kernel matrix\n",
  1622. "and returns the classification or regression performance\n",
  1623. "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
  1624. "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
  1625. "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
  1626. "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
  1627. "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
  1628. "correspond to the average of the performances on the test sets. \n",
  1629. "\n",
  1630. "@references\n",
  1631. " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
  1632. "\n",
  1633. "\n",
  1634. " --- calculating kernel matrix when subtree height = 0 ---\n",
  1635. "\n",
  1636. " Loading dataset from file...\n",
  1637. "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n",
  1638. " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n",
  1639. " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n",
  1640. " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n",
  1641. " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n",
  1642. " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n",
  1643. " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n",
  1644. " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n",
  1645. " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n",
  1646. " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n",
  1647. " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n",
  1648. " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n",
  1649. " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n",
  1650. " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n",
  1651. " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n",
  1652. " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n",
  1653. " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n",
  1654. " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n",
  1655. "\n",
  1656. " --- This is a regression problem ---\n",
  1657. "\n",
  1658. " Calculating kernel matrix, this could take a while...\n"
  1659. ]
  1660. },
  1661. {
  1662. "ename": "KeyboardInterrupt",
  1663. "evalue": "",
  1664. "output_type": "error",
  1665. "traceback": [
  1666. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  1667. "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
  1668. "\u001b[0;32m<ipython-input-1-2ce8cff340bc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Calculating kernel matrix, this could take a while...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweisfeilerlehmankernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase_kernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'sp'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mKmatrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n Saving kernel matrix to file...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1669. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36mweisfeilerlehmankernel\u001b[0;34m(height, base_kernel, *args)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_weisfeilerlehmankernel_do\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  1670. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py\u001b[0m in \u001b[0;36m_weisfeilerlehmankernel_do\u001b[0;34m(G1, G2, height)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# calculate kernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# change your base kernel here (and one more before)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;31m# get label sets of both graphs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1671. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spkernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me2\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cost'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0me2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  1672. "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
  1673. ]
  1674. }
  1675. ],
  1676. "source": [
  1677. "# Author: Elisabetta Ghisu\n",
  1678. "# test of WL subtree kernel\n",
  1679. "\n",
  1680. "\"\"\"\n",
  1681. "- This script take as input a kernel matrix\n",
  1682. "and returns the classification or regression performance\n",
  1683. "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
  1684. "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
  1685. "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
  1686. "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
  1687. "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
  1688. "correspond to the average of the performances on the test sets. \n",
  1689. "\n",
  1690. "@references\n",
  1691. " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
  1692. "\"\"\"\n",
  1693. "\n",
  1694. "print(__doc__)\n",
  1695. "\n",
  1696. "import sys\n",
  1697. "import os\n",
  1698. "import pathlib\n",
  1699. "sys.path.insert(0, \"../\")\n",
  1700. "from tabulate import tabulate\n",
  1701. "\n",
  1702. "import random\n",
  1703. "import numpy as np\n",
  1704. "import matplotlib.pyplot as plt\n",
  1705. "\n",
  1706. "from sklearn.kernel_ridge import KernelRidge # 0.17\n",
  1707. "from sklearn.metrics import accuracy_score, mean_squared_error\n",
  1708. "from sklearn import svm\n",
  1709. "\n",
  1710. "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n",
  1711. "from pygraph.utils.graphfiles import loadDataset\n",
  1712. "\n",
  1713. "val_means_height = []\n",
  1714. "val_stds_height = []\n",
  1715. "test_means_height = []\n",
  1716. "test_stds_height = []\n",
  1717. "\n",
  1718. "\n",
  1719. "for height in np.linspace(0, 10, 11):\n",
  1720. " print('\\n --- calculating kernel matrix when subtree height = %d ---' % height)\n",
  1721. "\n",
  1722. " print('\\n Loading dataset from file...')\n",
  1723. " dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
  1724. " y = np.array(y)\n",
  1725. " print(y)\n",
  1726. "\n",
  1727. " # setup the parameters\n",
  1728. " model_type = 'regression' # Regression or classification problem\n",
  1729. " print('\\n --- This is a %s problem ---' % model_type)\n",
  1730. "\n",
  1731. " datasize = len(dataset)\n",
  1732. " trials = 100 # Trials for hyperparameters random search\n",
  1733. " splits = 10 # Number of splits of the data\n",
  1734. " alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n",
  1735. " C_grid = np.logspace(-10, 10, num = trials, base = 10)\n",
  1736. " random.seed(20) # Set the seed for uniform parameter distribution\n",
  1737. "\n",
  1738. " # set the output path\n",
  1739. " kernel_file_path = 'kernelmatrices_weisfeilerlehman_acyclic/'\n",
  1740. " if not os.path.exists(kernel_file_path):\n",
  1741. " os.makedirs(kernel_file_path)\n",
  1742. "\n",
  1743. "\n",
  1744. " \"\"\"\n",
  1745. " - Here starts the main program\n",
  1746. " - First we permute the data, then for each split we evaluate corresponding performances\n",
  1747. " - In the end, the performances are averaged over the test sets\n",
  1748. " \"\"\"\n",
  1749. "\n",
  1750. " # save kernel matrices to files / read kernel matrices from files\n",
  1751. " kernel_file = kernel_file_path + 'km.ds'\n",
  1752. " path = pathlib.Path(kernel_file)\n",
  1753. " # get train set kernel matrix\n",
  1754. " if path.is_file():\n",
  1755. " print('\\n Loading the kernel matrix from file...')\n",
  1756. " Kmatrix = np.loadtxt(kernel_file)\n",
  1757. " print(Kmatrix)\n",
  1758. " else:\n",
  1759. " print('\\n Calculating kernel matrix, this could take a while...')\n",
  1760. " Kmatrix = weisfeilerlehmankernel(dataset, node_label = 'atom', height = int(height), base_kernel = 'sp')\n",
  1761. " print(Kmatrix)\n",
  1762. " print('\\n Saving kernel matrix to file...')\n",
  1763. "# np.savetxt(kernel_file, Kmatrix)\n",
  1764. "\n",
  1765. " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n",
  1766. " val_split = []\n",
  1767. " test_split = []\n",
  1768. "\n",
  1769. " # For each split of the data\n",
  1770. " for j in range(10, 10 + splits):\n",
  1771. " # print('\\n Starting split %d...' % j)\n",
  1772. "\n",
  1773. " # Set the random set for data permutation\n",
  1774. " random_state = int(j)\n",
  1775. " np.random.seed(random_state)\n",
  1776. " idx_perm = np.random.permutation(datasize)\n",
  1777. " # print(idx_perm)\n",
  1778. "\n",
  1779. " # Permute the data\n",
  1780. " y_perm = y[idx_perm] # targets permutation\n",
  1781. " # print(y_perm)\n",
  1782. " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n",
  1783. " # print(Kmatrix_perm)\n",
  1784. " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n",
  1785. "\n",
  1786. " # Set the training, validation and test\n",
  1787. " # Note: the percentage can be set up by the user\n",
  1788. " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n",
  1789. " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n",
  1790. " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n",
  1791. " num_val = num_train_val - num_train # 10% (of train + val) for validation\n",
  1792. "\n",
  1793. " # Split the kernel matrix\n",
  1794. " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n",
  1795. " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n",
  1796. " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n",
  1797. "\n",
  1798. " # Split the targets\n",
  1799. " y_train = y_perm[0:num_train]\n",
  1800. "\n",
  1801. " # Normalization step (for real valued targets only)\n",
  1802. " if model_type == 'regression':\n",
  1803. " # print('\\n Normalizing output y...')\n",
  1804. " y_train_mean = np.mean(y_train)\n",
  1805. " y_train_std = np.std(y_train)\n",
  1806. " y_train = (y_train - y_train_mean) / float(y_train_std)\n",
  1807. " # print(y)\n",
  1808. "\n",
  1809. " y_val = y_perm[num_train:(num_train + num_val)]\n",
  1810. " y_test = y_perm[(num_train + num_val):datasize]\n",
  1811. "\n",
  1812. " # Record the performance for each parameter trial respectively on validation and test set\n",
  1813. " perf_all_train = []\n",
  1814. " perf_all_test = []\n",
  1815. "\n",
  1816. " # For each parameter trial\n",
  1817. " for i in range(trials):\n",
  1818. " # For regression use the Kernel Ridge method\n",
  1819. " if model_type == 'regression':\n",
  1820. " # print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n",
  1821. "\n",
  1822. " # Fit the kernel ridge model\n",
  1823. " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n",
  1824. " # KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n",
  1825. " KR.fit(Kmatrix_train, y_train)\n",
  1826. "\n",
  1827. " # predict on the validation and test set\n",
  1828. " y_pred = KR.predict(Kmatrix_val)\n",
  1829. " y_pred_test = KR.predict(Kmatrix_test)\n",
  1830. " # print(y_pred)\n",
  1831. "\n",
  1832. " # adjust prediction: needed because the training targets have been normalizaed\n",
  1833. " y_pred = y_pred * float(y_train_std) + y_train_mean\n",
  1834. " # print(y_pred)\n",
  1835. " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n",
  1836. " # print(y_pred_test)\n",
  1837. "\n",
  1838. " # root mean squared error on validation\n",
  1839. " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n",
  1840. " perf_all_val.append(rmse)\n",
  1841. "\n",
  1842. " # root mean squared error in test \n",
  1843. " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n",
  1844. " perf_all_test.append(rmse_test)\n",
  1845. "\n",
  1846. " # print('The performance on the validation set is: %3f' % rmse)\n",
  1847. " # print('The performance on the test set is: %3f' % rmse_test)\n",
  1848. "\n",
  1849. " # --- FIND THE OPTIMAL PARAMETERS --- #\n",
  1850. " # For regression: minimise the mean squared error\n",
  1851. " if model_type == 'regression':\n",
  1852. "\n",
  1853. " # get optimal parameter on validation (argmin mean squared error)\n",
  1854. " min_idx = np.argmin(perf_all_test)\n",
  1855. " alpha_opt = alpha_grid[min_idx]\n",
  1856. "\n",
  1857. " # performance corresponding to optimal parameter on val\n",
  1858. " perf_val_opt = perf_all_val[min_idx]\n",
  1859. "\n",
  1860. " # corresponding performance on test for the same parameter\n",
  1861. " perf_test_opt = perf_all_test[min_idx]\n",
  1862. "\n",
  1863. " # print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n",
  1864. " # print('The best performance on the validation set is: %3f' % perf_val_opt)\n",
  1865. " # print('The corresponding performance on test set is: %3f' % perf_test_opt)\n",
  1866. "\n",
  1867. " # append the best performance on validation\n",
  1868. " # at the current split\n",
  1869. " val_split.append(perf_val_opt)\n",
  1870. "\n",
  1871. " # append the correponding performance on the test set\n",
  1872. " test_split.append(perf_test_opt)\n",
  1873. "\n",
  1874. " # average the results\n",
  1875. " # mean of the validation performances over the splits\n",
  1876. " val_mean = np.mean(np.asarray(val_split))\n",
  1877. " # std deviation of validation over the splits\n",
  1878. " val_std = np.std(np.asarray(val_split))\n",
  1879. "\n",
  1880. " # mean of the test performances over the splits\n",
  1881. " test_mean = np.mean(np.asarray(test_split))\n",
  1882. " # std deviation of the test oer the splits\n",
  1883. " test_std = np.std(np.asarray(test_split))\n",
  1884. "\n",
  1885. " print('\\n Mean performance on val set: %3f' % val_mean)\n",
  1886. " print('With standard deviation: %3f' % val_std)\n",
  1887. " print('\\n Mean performance on test set: %3f' % test_mean)\n",
  1888. " print('With standard deviation: %3f' % test_std)\n",
  1889. " \n",
  1890. " val_means_height.append(val_mean)\n",
  1891. " val_stds_height.append(val_std)\n",
  1892. " test_means_height.append(test_mean)\n",
  1893. " test_stds_height.append(test_std)\n",
  1894. " \n",
  1895. "print('\\n') \n",
  1896. "print(tabulate({'height': np.linspace(1, 12, 11), 'RMSE': test_means_height, 'std': test_stds_height}, headers='keys'))"
  1897. ]
  1898. },
  1899. {
  1900. "cell_type": "code",
  1901. "execution_count": 15,
  1902. "metadata": {},
  1903. "outputs": [
  1904. {
  1905. "data": {
  1906. "text/plain": [
  1907. "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}"
  1908. ]
  1909. },
  1910. "execution_count": 15,
  1911. "metadata": {},
  1912. "output_type": "execute_result"
  1913. }
  1914. ],
  1915. "source": [
  1916. "# a = [0, 1, 3, 2]\n",
  1917. "# b = [3, 2, 1, 0]\n",
  1918. "# print(1 if a == b else 0)\n",
  1919. "\n",
  1920. "# max(1 ,2)\n",
  1921. "\n",
  1922. "# x = [ 'r', 'a', 's' ]\n",
  1923. "# x.sort()\n",
  1924. "# print(x)\n",
  1925. "\n",
  1926. "# def test1(*args, base = 'subtree'):\n",
  1927. "# if base == 'subtree':\n",
  1928. "# print('subtree')\n",
  1929. "# elif base == 'edge':\n",
  1930. "# print('edge')\n",
  1931. "# else:\n",
  1932. "# print('sp')\n",
  1933. "\n",
  1934. "# # function parameter usage test\n",
  1935. "# test1('hello', 'hi', base = 'edge')\n",
  1936. "\n",
  1937. "# # python matrix calculation speed test\n",
  1938. "# import numpy as np\n",
  1939. "# import time\n",
  1940. "\n",
  1941. "# size = 100\n",
  1942. "# m1 = np.random.random((size, size))\n",
  1943. "# m2 = np.random.random((size, size))\n",
  1944. "# itr = 1\n",
  1945. "\n",
  1946. "# start_time = time.time()\n",
  1947. "# for i in range(itr):\n",
  1948. "# np.dot(m1, m2)\n",
  1949. "# print(time.time() - start_time)\n",
  1950. "\n",
  1951. "# start_time = time.time()\n",
  1952. "# for j in range(itr):\n",
  1953. "# result = np.zeros((size, size))\n",
  1954. "# for i1 in range(size):\n",
  1955. "# for i2 in range(size):\n",
  1956. "# for i3 in range(size):\n",
  1957. "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n",
  1958. "# print(time.time() - start_time)\n",
  1959. "\n",
  1960. "# start_time = time.time()\n",
  1961. "# for i in range(itr):\n",
  1962. "# print(np.dot(m1, m2))\n",
  1963. "# print(time.time() - start_time)\n",
  1964. "\n",
  1965. "# start_time = time.time()\n",
  1966. "# for j in range(itr):\n",
  1967. "# result = np.zeros((size, size))\n",
  1968. "# for i1 in range(size):\n",
  1969. "# for i2 in range(size):\n",
  1970. "# for i3 in range(size):\n",
  1971. "# result[i1][i2] += m1[i1][i3] * m2[i3][i2]\n",
  1972. "# print(result)\n",
  1973. "# print(time.time() - start_time)\n",
  1974. "\n",
  1975. "# help(np.sum)\n",
  1976. "\n",
  1977. "# test dict\n",
  1978. "import sys\n",
  1979. "from collections import Counter\n",
  1980. "import networkx as nx\n",
  1981. "sys.path.insert(0, \"../\")\n",
  1982. "from pygraph.utils.graphfiles import loadDataset\n",
  1983. "from pygraph.kernels.spkernel import spkernel\n",
  1984. "\n",
  1985. "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
  1986. "G1 = dataset[15]\n",
  1987. "nx.get_node_attributes(G1, 'label')\n",
  1988. "listhqhq = list(nx.get_node_attributes(G1, 'label').values())\n",
  1989. "dicthaha = dict(Counter(listhqhq))\n",
  1990. "len(dicthaha)"
  1991. ]
  1992. }
  1993. ],
  1994. "metadata": {
  1995. "kernelspec": {
  1996. "display_name": "Python 3",
  1997. "language": "python",
  1998. "name": "python3"
  1999. },
  2000. "language_info": {
  2001. "codemirror_mode": {
  2002. "name": "ipython",
  2003. "version": 3
  2004. },
  2005. "file_extension": ".py",
  2006. "mimetype": "text/x-python",
  2007. "name": "python",
  2008. "nbconvert_exporter": "python",
  2009. "pygments_lexer": "ipython3",
  2010. "version": "3.5.2"
  2011. }
  2012. },
  2013. "nbformat": 4,
  2014. "nbformat_minor": 2
  2015. }

A Python package for graph kernels, graph edit distances and graph pre-image problem.