{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- This is a regression problem ---\n", "\n", "\n", " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3646550178527832 seconds ---\n", "[[ 5. 6. 4. ... 20. 20. 20.]\n", " [ 6. 8. 4. ... 20. 20. 20.]\n", " [ 4. 4. 5. ... 21. 21. 21.]\n", " ...\n", " [ 20. 20. 21. ... 101. 101. 101.]\n", " [ 20. 20. 21. ... 101. 101. 101.]\n", " [ 20. 20. 21. ... 101. 101. 101.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 664.77it/s]\n", " Mean performance on train set: 17.681582\n", "With standard deviation: 0.713183\n", "\n", " Mean performance on test set: 15.685879\n", "With standard deviation: 4.139197\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 681.36it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.7535510063171387 seconds ---\n", "[[ 10. 10. 4. ... 20. 20. 20.]\n", " [ 10. 16. 4. ... 20. 20. 20.]\n", " [ 4. 4. 10. ... 22. 22. 24.]\n", " ...\n", " [ 20. 20. 22. ... 130. 130. 122.]\n", " [ 20. 20. 22. ... 130. 130. 122.]\n", " [ 20. 20. 24. ... 122. 122. 154.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 94%|█████████▍| 945/1000 [00:01<00:00, 713.00it/s]\n", " Mean performance on train set: 6.270014\n", "With standard deviation: 0.654734\n", "\n", " Mean performance on test set: 7.550458\n", "With standard deviation: 2.331786\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.46it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 2.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.3278343677520752 seconds ---\n", "[[ 15. 10. 4. ... 20. 20. 20.]\n", " [ 10. 24. 4. ... 20. 20. 20.]\n", " [ 4. 4. 15. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 159. 151. 124.]\n", " [ 20. 20. 22. ... 151. 153. 124.]\n", " [ 20. 20. 26. ... 124. 124. 185.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▍| 949/1000 [00:01<00:00, 736.38it/s]\n", " Mean performance on train set: 4.450682\n", "With standard deviation: 0.882129\n", "\n", " Mean performance on test set: 9.728466\n", "With standard deviation: 2.057669\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 709.22it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 3.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.7653727531433105 seconds ---\n", "[[ 20. 10. 4. ... 20. 20. 20.]\n", " [ 10. 32. 4. ... 20. 20. 20.]\n", " [ 4. 4. 20. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 188. 159. 124.]\n", " [ 20. 20. 22. ... 159. 168. 124.]\n", " [ 20. 20. 26. ... 124. 124. 202.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 96%|█████████▌| 959/1000 [00:01<00:00, 724.60it/s]\n", " Mean performance on train set: 2.270586\n", "With standard deviation: 0.481516\n", "\n", " Mean performance on test set: 11.296110\n", "With standard deviation: 2.799944\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 670.29it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2821996212005615 seconds ---\n", "[[ 25. 10. 4. ... 20. 20. 20.]\n", " [ 10. 40. 4. ... 20. 20. 20.]\n", " [ 4. 4. 25. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 217. 159. 124.]\n", " [ 20. 20. 22. ... 159. 183. 124.]\n", " [ 20. 20. 26. ... 124. 124. 213.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 709.28it/s]\n", " Mean performance on train set: 1.074035\n", "With standard deviation: 0.637823\n", "\n", " Mean performance on test set: 12.808303\n", "With standard deviation: 3.446939\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 646.12it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 5.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.706934928894043 seconds ---\n", "[[ 30. 10. 4. ... 20. 20. 20.]\n", " [ 10. 48. 4. ... 20. 20. 20.]\n", " [ 4. 4. 30. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 246. 159. 124.]\n", " [ 20. 20. 22. ... 159. 198. 124.]\n", " [ 20. 20. 26. ... 124. 124. 224.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 553.49it/s]\n", " Mean performance on train set: 0.700602\n", "With standard deviation: 0.572640\n", "\n", " Mean performance on test set: 14.017923\n", "With standard deviation: 3.675042\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 621.01it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 6.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.1140964031219482 seconds ---\n", "[[ 35. 10. 4. ... 20. 20. 20.]\n", " [ 10. 56. 4. ... 20. 20. 20.]\n", " [ 4. 4. 35. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 275. 159. 124.]\n", " [ 20. 20. 22. ... 159. 213. 124.]\n", " [ 20. 20. 26. ... 124. 124. 235.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 100%|█████████▉| 997/1000 [00:01<00:00, 595.50it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 14.918434\n", "With standard deviation: 3.805352\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 586.05it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.5894455909729004 seconds ---\n", "[[ 40. 10. 4. ... 20. 20. 20.]\n", " [ 10. 64. 4. ... 20. 20. 20.]\n", " [ 4. 4. 40. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 304. 159. 124.]\n", " [ 20. 20. 22. ... 159. 228. 124.]\n", " [ 20. 20. 26. ... 124. 124. 246.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 99%|█████████▉| 991/1000 [00:01<00:00, 663.55it/s]\n", " Mean performance on train set: 0.691516\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 15.629476\n", "With standard deviation: 3.865387\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 627.59it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 8.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.081295967102051 seconds ---\n", "[[ 45. 10. 4. ... 20. 20. 20.]\n", " [ 10. 72. 4. ... 20. 20. 20.]\n", " [ 4. 4. 45. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 333. 159. 124.]\n", " [ 20. 20. 22. ... 159. 243. 124.]\n", " [ 20. 20. 26. ... 124. 124. 257.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 96%|█████████▌| 961/1000 [00:01<00:00, 601.33it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 16.214369\n", "With standard deviation: 3.928756\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 603.90it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 9.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.497286796569824 seconds ---\n", "[[ 50. 10. 4. ... 20. 20. 20.]\n", " [ 10. 80. 4. ... 20. 20. 20.]\n", " [ 4. 4. 50. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 362. 159. 124.]\n", " [ 20. 20. 22. ... 159. 258. 124.]\n", " [ 20. 20. 26. ... 124. 124. 268.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 511.55it/s]\n", " Mean performance on train set: 0.691515\n", "With standard deviation: 0.564620\n", "\n", " Mean performance on test set: 16.725744\n", "With standard deviation: 3.993095\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 550.66it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.984841585159302 seconds ---\n", "[[ 55. 10. 4. ... 20. 20. 20.]\n", " [ 10. 88. 4. ... 20. 20. 20.]\n", " [ 4. 4. 55. ... 22. 22. 26.]\n", " ...\n", " [ 20. 20. 22. ... 391. 159. 124.]\n", " [ 20. 20. 22. ... 159. 273. 124.]\n", " [ 20. 20. 26. ... 124. 124. 279.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 94%|█████████▍| 942/1000 [00:01<00:00, 708.78it/s]\n", " Mean performance on train set: 0.691516\n", "With standard deviation: 0.564621\n", "\n", " Mean performance on test set: 17.186401\n", "With standard deviation: 4.056724\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 711.43it/s]\n", "\n", "\n", " height rmse_test std_test rmse_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- --------\n", " 0 15.6859 4.1392 17.6816 0.713183 0.364655\n", " 1 7.55046 2.33179 6.27001 0.654734 0.753551\n", " 2 9.72847 2.05767 4.45068 0.882129 1.32783\n", " 3 11.2961 2.79994 2.27059 0.481516 1.76537\n", " 4 12.8083 3.44694 1.07403 0.637823 2.2822\n", " 5 14.0179 3.67504 0.700602 0.57264 2.70693\n", " 6 14.9184 3.80535 0.691515 0.56462 3.1141\n", " 7 15.6295 3.86539 0.691516 0.56462 3.58945\n", " 8 16.2144 3.92876 0.691515 0.56462 4.0813\n", " 9 16.7257 3.9931 0.691515 0.56462 4.49729\n", " 10 17.1864 4.05672 0.691516 0.564621 4.98484\n" ] } ], "source": [ "# wl subtree kernel\n", "%load_ext line_profiler\n", "\n", "import numpy as np\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type')\n", "\n", "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", "\n", "# %lprun -f _wl_subtreekernel_do \\\n", "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- This is a regression problem ---\n", "\n", "\n", " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 13.504083633422852 seconds ---\n", "[[ 3. 1. 3. ... 1. 1. 1.]\n", " [ 1. 6. 1. ... 0. 0. 3.]\n", " [ 3. 1. 3. ... 1. 1. 1.]\n", " ...\n", " [ 1. 0. 1. ... 55. 21. 7.]\n", " [ 1. 0. 1. ... 21. 55. 7.]\n", " [ 1. 3. 1. ... 7. 7. 55.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 773.79it/s]\n", " Mean performance on train set: 28.360361\n", "With standard deviation: 1.357183\n", "\n", " Mean performance on test set: 35.191954\n", "With standard deviation: 4.495767\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 743.82it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 26.82917618751526 seconds ---\n", "[[ 6. 2. 6. ... 2. 2. 2.]\n", " [ 2. 12. 2. ... 0. 0. 6.]\n", " [ 6. 2. 6. ... 2. 2. 2.]\n", " ...\n", " [ 2. 0. 2. ... 110. 42. 14.]\n", " [ 2. 0. 2. ... 42. 110. 14.]\n", " [ 2. 6. 2. ... 14. 14. 110.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 751.78it/s]\n", " Mean performance on train set: 27.933534\n", "With standard deviation: 1.448359\n", "\n", " Mean performance on test set: 35.180815\n", "With standard deviation: 4.500453\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 744.44it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 2.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 40.235626220703125 seconds ---\n", "[[ 9. 3. 9. ... 3. 3. 3.]\n", " [ 3. 18. 3. ... 0. 0. 9.]\n", " [ 9. 3. 9. ... 3. 3. 3.]\n", " ...\n", " [ 3. 0. 3. ... 165. 63. 21.]\n", " [ 3. 0. 3. ... 63. 165. 21.]\n", " [ 3. 9. 3. ... 21. 21. 165.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 694.10it/s]\n", " Mean performance on train set: 28.111311\n", "With standard deviation: 1.508915\n", "\n", " Mean performance on test set: 35.163150\n", "With standard deviation: 4.502054\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 695.02it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 3.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 54.67040753364563 seconds ---\n", "[[ 12. 4. 12. ... 4. 4. 4.]\n", " [ 4. 24. 4. ... 0. 0. 12.]\n", " [ 12. 4. 12. ... 4. 4. 4.]\n", " ...\n", " [ 4. 0. 4. ... 220. 84. 28.]\n", " [ 4. 0. 4. ... 84. 220. 28.]\n", " [ 4. 12. 4. ... 28. 28. 220.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 748.03it/s]\n", " Mean performance on train set: 28.390274\n", "With standard deviation: 1.365711\n", "\n", " Mean performance on test set: 35.194634\n", "With standard deviation: 4.498007\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 726.68it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 67.15217232704163 seconds ---\n", "[[ 15. 5. 15. ... 5. 5. 5.]\n", " [ 5. 30. 5. ... 0. 0. 15.]\n", " [ 15. 5. 15. ... 5. 5. 5.]\n", " ...\n", " [ 5. 0. 5. ... 275. 105. 35.]\n", " [ 5. 0. 5. ... 105. 275. 35.]\n", " [ 5. 15. 5. ... 35. 35. 275.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▌| 950/1000 [00:01<00:00, 737.07it/s]\n", " Mean performance on train set: 27.974611\n", "With standard deviation: 1.462223\n", "\n", " Mean performance on test set: 35.175314\n", "With standard deviation: 4.501113\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.71it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 5.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 80.08806300163269 seconds ---\n", "[[ 18. 6. 18. ... 6. 6. 6.]\n", " [ 6. 36. 6. ... 0. 0. 18.]\n", " [ 18. 6. 18. ... 6. 6. 6.]\n", " ...\n", " [ 6. 0. 6. ... 330. 126. 42.]\n", " [ 6. 0. 6. ... 126. 330. 42.]\n", " [ 6. 18. 6. ... 42. 42. 330.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 735.71it/s]\n", " Mean performance on train set: 28.018415\n", "With standard deviation: 1.455644\n", "\n", " Mean performance on test set: 35.199713\n", "With standard deviation: 4.507104\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 738.55it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 6.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 92.19254112243652 seconds ---\n", "[[ 21. 7. 21. ... 7. 7. 7.]\n", " [ 7. 42. 7. ... 0. 0. 21.]\n", " [ 21. 7. 21. ... 7. 7. 7.]\n", " ...\n", " [ 7. 0. 7. ... 385. 147. 49.]\n", " [ 7. 0. 7. ... 147. 385. 49.]\n", " [ 7. 21. 7. ... 49. 49. 385.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 721.42it/s]\n", " Mean performance on train set: 28.373079\n", "With standard deviation: 1.600565\n", "\n", " Mean performance on test set: 35.164471\n", "With standard deviation: 4.498487\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 727.58it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 105.81170415878296 seconds ---\n", "[[ 24. 8. 24. ... 8. 8. 8.]\n", " [ 8. 48. 8. ... 0. 0. 24.]\n", " [ 24. 8. 24. ... 8. 8. 8.]\n", " ...\n", " [ 8. 0. 8. ... 440. 168. 56.]\n", " [ 8. 0. 8. ... 168. 440. 56.]\n", " [ 8. 24. 8. ... 56. 56. 440.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 97%|█████████▋| 968/1000 [00:01<00:00, 739.67it/s]\n", " Mean performance on train set: 27.960421\n", "With standard deviation: 1.457425\n", "\n", " Mean performance on test set: 35.177115\n", "With standard deviation: 4.500904\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 733.61it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 8.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 119.0216612815857 seconds ---\n", "[[ 27. 9. 27. ... 9. 9. 9.]\n", " [ 9. 54. 9. ... 0. 0. 27.]\n", " [ 27. 9. 27. ... 9. 9. 9.]\n", " ...\n", " [ 9. 0. 9. ... 495. 189. 63.]\n", " [ 9. 0. 9. ... 189. 495. 63.]\n", " [ 9. 27. 9. ... 63. 63. 495.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 752.10it/s]\n", " Mean performance on train set: 28.199059\n", "With standard deviation: 1.514897\n", "\n", " Mean performance on test set: 35.196848\n", "With standard deviation: 4.505256\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 768.54it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 9.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 131.22810459136963 seconds ---\n", "[[ 30. 10. 30. ... 10. 10. 10.]\n", " [ 10. 60. 10. ... 0. 0. 30.]\n", " [ 30. 10. 30. ... 10. 10. 10.]\n", " ...\n", " [ 10. 0. 10. ... 550. 210. 70.]\n", " [ 10. 0. 10. ... 210. 550. 70.]\n", " [ 10. 30. 10. ... 70. 70. 550.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 93%|█████████▎| 932/1000 [00:01<00:00, 763.55it/s]\n", " Mean performance on train set: 28.266520\n", "With standard deviation: 1.307686\n", "\n", " Mean performance on test set: 35.195635\n", "With standard deviation: 4.501972\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 764.12it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 144.96362161636353 seconds ---\n", "[[ 33. 11. 33. ... 11. 11. 11.]\n", " [ 11. 66. 11. ... 0. 0. 33.]\n", " [ 33. 11. 33. ... 11. 11. 11.]\n", " ...\n", " [ 11. 0. 11. ... 605. 231. 77.]\n", " [ 11. 0. 11. ... 231. 605. 77.]\n", " [ 11. 33. 11. ... 77. 77. 605.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 100%|█████████▉| 996/1000 [00:01<00:00, 820.73it/s]\n", " Mean performance on train set: 28.416280\n", "With standard deviation: 1.615957\n", "\n", " Mean performance on test set: 35.167588\n", "With standard deviation: 4.497227\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 822.53it/s]\n", "\n", "\n", " height rmse_test std_test rmse_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- --------\n", " 0 35.192 4.49577 28.3604 1.35718 13.5041\n", " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n", " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n", " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n", " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n", " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n", " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n", " 7 35.1771 4.5009 27.9604 1.45742 105.812\n", " 8 35.1968 4.50526 28.1991 1.5149 119.022\n", " 9 35.1956 4.50197 28.2665 1.30769 131.228\n", " 10 35.1676 4.49723 28.4163 1.61596 144.964\n" ] } ], "source": [ "# WL sp kernel\n", "%load_ext line_profiler\n", "\n", "import numpy as np\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'sp')\n", "\n", "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", "\n", "# %lprun -f _wl_subtreekernel_do \\\n", "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The line_profiler extension is already loaded. To reload it, use:\n", " %reload_ext line_profiler\n", "\n", " --- This is a regression problem ---\n", "\n", "\n", " #--- calculating kernel matrix when height = 0.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 0.8530018329620361 seconds ---\n", "[[ 2. 1. 2. ... 0. 0. 1.]\n", " [ 1. 3. 1. ... 0. 0. 2.]\n", " [ 2. 1. 2. ... 0. 0. 1.]\n", " ...\n", " [ 0. 0. 0. ... 10. 7. 0.]\n", " [ 0. 0. 0. ... 7. 10. 1.]\n", " [ 1. 2. 1. ... 0. 1. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▍| 947/1000 [00:01<00:00, 719.29it/s]\n", " Mean performance on train set: 29.997498\n", "With standard deviation: 0.902340\n", "\n", " Mean performance on test set: 33.407740\n", "With standard deviation: 4.732717\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 653.54it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 1.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 1.717505931854248 seconds ---\n", "[[ 4. 2. 4. ... 0. 0. 2.]\n", " [ 2. 6. 2. ... 0. 0. 4.]\n", " [ 4. 2. 4. ... 0. 0. 2.]\n", " ...\n", " [ 0. 0. 0. ... 20. 14. 0.]\n", " [ 0. 0. 0. ... 14. 20. 2.]\n", " [ 2. 4. 2. ... 0. 2. 20.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 96%|█████████▌| 956/1000 [00:01<00:00, 721.27it/s]\n", " Mean performance on train set: 30.160338\n", "With standard deviation: 1.094235\n", "\n", " Mean performance on test set: 33.423458\n", "With standard deviation: 4.721311\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 723.53it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 2.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 2.6603214740753174 seconds ---\n", "[[ 6. 3. 6. ... 0. 0. 3.]\n", " [ 3. 9. 3. ... 0. 0. 6.]\n", " [ 6. 3. 6. ... 0. 0. 3.]\n", " ...\n", " [ 0. 0. 0. ... 30. 21. 0.]\n", " [ 0. 0. 0. ... 21. 30. 3.]\n", " [ 3. 6. 3. ... 0. 3. 30.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 94%|█████████▍| 944/1000 [00:01<00:00, 650.98it/s]\n", " Mean performance on train set: 29.928570\n", "With standard deviation: 0.787941\n", "\n", " Mean performance on test set: 33.433014\n", "With standard deviation: 4.724408\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 688.71it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 3.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 3.477631092071533 seconds ---\n", "[[ 8. 4. 8. ... 0. 0. 4.]\n", " [ 4. 12. 4. ... 0. 0. 8.]\n", " [ 8. 4. 8. ... 0. 0. 4.]\n", " ...\n", " [ 0. 0. 0. ... 40. 28. 0.]\n", " [ 0. 0. 0. ... 28. 40. 4.]\n", " [ 4. 8. 4. ... 0. 4. 40.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 725.15it/s]\n", " Mean performance on train set: 30.011409\n", "With standard deviation: 0.909674\n", "\n", " Mean performance on test set: 33.407319\n", "With standard deviation: 4.732434\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 720.71it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 4.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 4.5436692237854 seconds ---\n", "[[10. 5. 10. ... 0. 0. 5.]\n", " [ 5. 15. 5. ... 0. 0. 10.]\n", " [10. 5. 10. ... 0. 0. 5.]\n", " ...\n", " [ 0. 0. 0. ... 50. 35. 0.]\n", " [ 0. 0. 0. ... 35. 50. 5.]\n", " [ 5. 10. 5. ... 0. 5. 50.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 568.04it/s]\n", " Mean performance on train set: 30.184162\n", "With standard deviation: 1.108902\n", "\n", " Mean performance on test set: 33.425625\n", "With standard deviation: 4.721660\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 564.24it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 5.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 5.6617820262908936 seconds ---\n", "[[12. 6. 12. ... 0. 0. 6.]\n", " [ 6. 18. 6. ... 0. 0. 12.]\n", " [12. 6. 12. ... 0. 0. 6.]\n", " ...\n", " [ 0. 0. 0. ... 60. 42. 0.]\n", " [ 0. 0. 0. ... 42. 60. 6.]\n", " [ 6. 12. 6. ... 0. 6. 60.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 99%|█████████▉| 993/1000 [00:01<00:00, 519.25it/s]\n", " Mean performance on train set: 30.041068\n", "With standard deviation: 1.018451\n", "\n", " Mean performance on test set: 33.406717\n", "With standard deviation: 4.726409\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 548.91it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 6.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 6.148027420043945 seconds ---\n", "[[14. 7. 14. ... 0. 0. 7.]\n", " [ 7. 21. 7. ... 0. 0. 14.]\n", " [14. 7. 14. ... 0. 0. 7.]\n", " ...\n", " [ 0. 0. 0. ... 70. 49. 0.]\n", " [ 0. 0. 0. ... 49. 70. 7.]\n", " [ 7. 14. 7. ... 0. 7. 70.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 498.31it/s]\n", " Mean performance on train set: 29.905596\n", "With standard deviation: 0.782179\n", "\n", " Mean performance on test set: 33.418992\n", "With standard deviation: 4.730753\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 534.86it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 7.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.603543519973755 seconds ---\n", "[[16. 8. 16. ... 0. 0. 8.]\n", " [ 8. 24. 8. ... 0. 0. 16.]\n", " [16. 8. 16. ... 0. 0. 8.]\n", " ...\n", " [ 0. 0. 0. ... 80. 56. 0.]\n", " [ 0. 0. 0. ... 56. 80. 8.]\n", " [ 8. 16. 8. ... 0. 8. 80.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 586.15it/s]\n", " Mean performance on train set: 30.175921\n", "With standard deviation: 1.103820\n", "\n", " Mean performance on test set: 33.424820\n", "With standard deviation: 4.721550\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 546.00it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 8.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.972221612930298 seconds ---\n", "[[18. 9. 18. ... 0. 0. 9.]\n", " [ 9. 27. 9. ... 0. 0. 18.]\n", " [18. 9. 18. ... 0. 0. 9.]\n", " ...\n", " [ 0. 0. 0. ... 90. 63. 0.]\n", " [ 0. 0. 0. ... 63. 90. 9.]\n", " [ 9. 18. 9. ... 0. 9. 90.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 490.30it/s]\n", " Mean performance on train set: 30.136537\n", "With standard deviation: 1.074854\n", "\n", " Mean performance on test set: 33.412196\n", "With standard deviation: 4.715539\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 536.66it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 9.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 9.070842504501343 seconds ---\n", "[[ 20. 10. 20. ... 0. 0. 10.]\n", " [ 10. 30. 10. ... 0. 0. 20.]\n", " [ 20. 10. 20. ... 0. 0. 10.]\n", " ...\n", " [ 0. 0. 0. ... 100. 70. 0.]\n", " [ 0. 0. 0. ... 70. 100. 10.]\n", " [ 10. 20. 10. ... 0. 10. 100.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 527.13it/s]\n", " Mean performance on train set: 30.032887\n", "With standard deviation: 0.921065\n", "\n", " Mean performance on test set: 33.407050\n", "With standard deviation: 4.731928\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 600.62it/s]\n", "\n", "\n", " #--- calculating kernel matrix when height = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "\n", " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 10.02536916732788 seconds ---\n", "[[ 22. 11. 22. ... 0. 0. 11.]\n", " [ 11. 33. 11. ... 0. 0. 22.]\n", " [ 22. 11. 22. ... 0. 0. 11.]\n", " ...\n", " [ 0. 0. 0. ... 110. 77. 0.]\n", " [ 0. 0. 0. ... 77. 110. 11.]\n", " [ 11. 22. 11. ... 0. 11. 110.]]\n", "\n", " Starting calculate accuracy/rmse...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "calculate performance: 97%|█████████▋| 970/1000 [00:01<00:00, 694.38it/s]\n", " Mean performance on train set: 29.924232\n", "With standard deviation: 0.790843\n", "\n", " Mean performance on test set: 33.416469\n", "With standard deviation: 4.731694\n", "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 678.72it/s]\n", "\n", "\n", " height rmse_test std_test rmse_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- ---------\n", " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n", " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n", " 2 33.433 4.72441 29.9286 0.787941 2.66032\n", " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n", " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n", " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n", " 6 33.419 4.73075 29.9056 0.782179 6.14803\n", " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n", " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n", " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n", " 10 33.4165 4.73169 29.9242 0.790843 10.0254\n" ] } ], "source": [ "# WL edge kernel\n", "%load_ext line_profiler\n", "\n", "import numpy as np\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'edge')\n", "\n", "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", "\n", "# %lprun -f _wl_subtreekernel_do \\\n", "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n", "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# results\n", "\n", "# subtree with y normalization\n", " height RMSE_test std_test RMSE_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- --------\n", " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n", " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n", " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n", " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n", " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n", " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n", " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n", " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n", " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n", " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n", " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n", "\n", "# subtree without y normalization\n", " height RMSE_test std_test RMSE_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- --------\n", " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n", " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n", " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n", " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n", " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n", " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n", " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n", " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n", " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n", " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n", " 10 17.1864 4.05672 0.691516 0.564621 5.00918\n", " \n", "# sp\n", " height rmse_test std_test rmse_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- --------\n", " 0 35.192 4.49577 28.3604 1.35718 13.5041\n", " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n", " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n", " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n", " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n", " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n", " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n", " 7 35.1771 4.5009 27.9604 1.45742 105.812\n", " 8 35.1968 4.50526 28.1991 1.5149 119.022\n", " 9 35.1956 4.50197 28.2665 1.30769 131.228\n", " 10 35.1676 4.49723 28.4163 1.61596 144.964\n", " \n", "# path\n", " height rmse_test std_test rmse_train std_train k_time\n", "-------- ----------- ---------- ------------ ----------- ---------\n", " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n", " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n", " 2 33.433 4.72441 29.9286 0.787941 2.66032\n", " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n", " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n", " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n", " 6 33.419 4.73075 29.9056 0.782179 6.14803\n", " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n", " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n", " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n", " 10 33.4165 4.73169 29.9242 0.790843 10.0254" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'O', 'C'}\n", "{'O', 'C'}\n" ] }, { "ename": "TypeError", "evalue": "'int' object is not iterable", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(edge_weight, *args)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mgetSPGraph\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \"\"\"\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'bond_type'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mfloydTransformation\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \"\"\"\n\u001b[0;32m---> 58\u001b[0;31m \u001b[0mspMatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloyd_warshall_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_nodes_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/algorithms/shortest_paths/dense.py\u001b[0m in \u001b[0;36mfloyd_warshall_numpy\u001b[0;34m(G, nodelist, weight)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m# nonedges are not given the value 0 as well.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m A = nx.to_numpy_matrix(G, nodelist=nodelist, multigraph_weight=min,\n\u001b[0;32m---> 54\u001b[0;31m weight=weight, nonedge=np.inf)\n\u001b[0m\u001b[1;32m 55\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mI\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midentity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_matrix\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 446\u001b[0m A = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order,\n\u001b[1;32m 447\u001b[0m \u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 448\u001b[0;31m nonedge=nonedge)\n\u001b[0m\u001b[1;32m 449\u001b[0m \u001b[0mM\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mM\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_array\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1062\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnodelist\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1063\u001b[0;31m \u001b[0mnodelist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1064\u001b[0m \u001b[0mnodeset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1065\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodeset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: 'int' object is not iterable" ] } ], "source": [ "import sys\n", "import networkx as nx\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.kernels.spkernel import spkernel\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "def weisfeilerlehman_test(G):\n", " '''\n", " Weisfeiler-Lehman test of graph isomorphism.\n", " '''\n", "\n", " nx.draw_networkx(G)\n", " plt.show()\n", " nx.draw_networkx_labels(G, nx.spring_layout(G), labels = nx.get_node_attributes(G,'label'))\n", " print(G.nodes(data = True))\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", " # Multiset-label determination.\n", " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", " # sorting each multiset\n", " multiset.sort()\n", " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", " set_multisets.append(multiset)\n", " \n", " # label compression\n", "# set_multisets.sort() # this is unnecessary\n", " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", " set_compressed = { value : str(set_unique.index(value)) for value in set_unique } # assign indices as the new labels\n", "# print(set_compressed)\n", "# print(set_multisets)\n", " \n", " # relabel nodes with multisets\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_multisets[node[0]]\n", " print(' -> ')\n", " nx.draw_networkx(G)\n", " plt.show()\n", " print(G.nodes(data = True))\n", "\n", " \n", " # relabel nodes\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", " \n", " print(' -> ')\n", " nx.draw_networkx(G)\n", " plt.show()\n", " print(G.nodes(data = True))\n", "\n", "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "G1 = dataset[12]\n", "G2 = dataset[55]\n", "\n", "# init.\n", "kernel = 0 # init kernel\n", "num_nodes1 = G1.number_of_nodes()\n", "num_nodes2 = G2.number_of_nodes()\n", "\n", "# the first iteration.\n", "labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", "labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", "print(labelset1)\n", "print(labelset2)\n", "kernel += spkernel(G1, G2)\n", "print(kernel)\n", "\n", "\n", "\n", "for height in range(0, min(num_nodes1, num_nodes2)): #Q how to determine the upper bound of the height?\n", " if labelset1 != labelset2:\n", " break\n", " \n", " # Weisfeiler-Lehman test of graph isomorphism.\n", " weisfeilerlehman_test(G1)\n", " weisfeilerlehman_test(G2)\n", " \n", " # calculate kernel\n", " kernel += spkernel(G1, G2)\n", " \n", " # get label sets of both graphs\n", " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", "# print(labelset1)\n", "# print(labelset2)\n", "\n", "print(kernel)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}\n", "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'C', 6: 'S', 7: 'S'}\n", "\n", " --- height = 0 --- \n", "\n", " --- for graph 0 --- \n", "\n", "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n", "all_labels_ori: {'C', 'O'}\n", "num_of_each_label: {'C': 5, 'O': 2}\n", "all_num_of_each_label: [{'C': 5, 'O': 2}]\n", "num_of_labels: 2\n", "all_labels_ori: {'C', 'O'}\n", "\n", " --- for graph 1 --- \n", "\n", "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n", "all_labels_ori: {'C', 'O', 'S'}\n", "num_of_each_label: {'C': 6, 'S': 2}\n", "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n", "num_of_labels: 2\n", "all_labels_ori: {'C', 'O', 'S'}\n", "\n", " all_num_of_labels_occured: 3\n", "\n", " --- calculating kernel matrix ---\n", "\n", " labels: {'C', 'O'}\n", "vector1: [[5 2]]\n", "vector2: [[5 2]]\n", "Kmatrix: [[ 29. 0.]\n", " [ 0. 0.]]\n", "\n", " labels: {'C', 'O', 'S'}\n", "vector1: [[5 2 0]]\n", "vector2: [[6 0 2]]\n", "Kmatrix: [[ 29. 30.]\n", " [ 30. 0.]]\n", "\n", " labels: {'C', 'S'}\n", "vector1: [[6 2]]\n", "vector2: [[6 2]]\n", "Kmatrix: [[ 29. 30.]\n", " [ 30. 40.]]\n", "\n", " --- height = 1 --- \n", "\n", " --- for graph 0 --- \n", "\n", "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n", "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n", "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n", "num_of_labels_occured: 7\n", "\n", " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n", "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n", "all_labels_ori: {'5', '4', '6', '7'}\n", "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n", "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n", "\n", " --- for graph 1 --- \n", "\n", "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n", "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n", "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n", "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n", "num_of_labels_occured: 10\n", "\n", " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n", "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n", "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n", "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n", "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n", "\n", " all_num_of_labels_occured: 10\n", "\n", " --- calculating kernel matrix ---\n", "\n", " labels: {'5', '4', '6', '7'}\n", "vector1: [[1 2 2 2]]\n", "vector2: [[1 2 2 2]]\n", "\n", " labels: {'10', '4', '7', '9', '6', '5', '8'}\n", "vector1: [[0 2 2 0 2 1 0]]\n", "vector2: [[1 0 3 2 0 0 2]]\n", "\n", " labels: {'8', '10', '7', '9'}\n", "vector1: [[2 1 3 2]]\n", "vector2: [[2 1 3 2]]\n", "\n", " Kmatrix: [[ 42. 36.]\n", " [ 36. 58.]]\n", "\n", " --- height = 2 --- \n", "\n", " --- for graph 0 --- \n", "\n", "multiset: ['76', '76', '647', '647', '544', '456', '456']\n", "set_unique: ['647', '76', '456', '544']\n", "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n", "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n", "num_of_labels_occured: 14\n", "\n", " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n", "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n", "all_labels_ori: {'14', '12', '11', '13'}\n", "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n", "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n", "\n", " --- for graph 1 --- \n", "\n", "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n", "set_unique: ['710', '8109', '79', '10788', '978']\n", "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n", "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n", "num_of_labels_occured: 19\n", "\n", " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n", "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n", "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n", "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n", "\n", " all_num_of_labels_occured: 19\n", "\n", " --- calculating kernel matrix ---\n", "\n", " labels: {'14', '12', '11', '13'}\n", "vector1: [[1 2 2 2]]\n", "vector2: [[1 2 2 2]]\n", "\n", " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n", "vector1: [[0 0 2 2 0 2 1 0 0]]\n", "vector2: [[1 2 0 0 2 0 0 2 1]]\n", "\n", " labels: {'18', '17', '15', '16', '19'}\n", "vector1: [[1 2 1 2 2]]\n", "vector2: [[1 2 1 2 2]]\n", "\n", " Kmatrix: [[ 55. 36.]\n", " [ 36. 72.]]\n", "\n", " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n" ] }, { "data": { "text/plain": [ "array([[ 55., 36.],\n", " [ 36., 72.]])" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test of WL subtree kernel on many graphs\n", "\n", "import sys\n", "import pathlib\n", "from collections import Counter\n", "sys.path.insert(0, \"../\")\n", "\n", "import networkx as nx\n", "import numpy as np\n", "import time\n", "\n", "from pygraph.kernels.spkernel import spkernel\n", "from pygraph.kernels.pathKernel import pathkernel\n", "\n", "def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):\n", " \"\"\"Calculate Weisfeiler-Lehman kernels between graphs.\n", " \n", " Parameters\n", " ----------\n", " Gn : List of NetworkX graph\n", " List of graphs between which the kernels are calculated.\n", " /\n", " G1, G2 : NetworkX graphs\n", " 2 graphs between which the kernel is calculated.\n", " \n", " height : subtree height\n", " \n", " base_kernel : base kernel used in each iteration of WL kernel\n", " the default base kernel is subtree kernel\n", " \n", " Return\n", " ------\n", " Kmatrix/Kernel : Numpy matrix/int\n", " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs.\n", " \n", " Notes\n", " -----\n", " This function now supports WL subtree kernel and WL shortest path kernel.\n", " \n", " References\n", " ----------\n", " [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61.\n", " \"\"\"\n", " if len(args) == 1: # for a list of graphs\n", "\n", "# print(args)\n", " start_time = time.time()\n", " \n", " # for WL subtree kernel\n", " if base_kernel == 'subtree': \n", " Kmatrix = _wl_subtreekernel_do(args[0], height = height, base_kernel = 'subtree')\n", " \n", " # for WL edge kernel\n", " elif base_kernel == 'edge':\n", " print('edge')\n", " \n", " # for WL shortest path kernel\n", " elif base_kernel == 'sp':\n", " Gn = args[0]\n", " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", " \n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j])\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", "\n", " print(\"\\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---\" % (base_kernel, len(args[0]), (time.time() - start_time)))\n", " \n", " return Kmatrix\n", " \n", " else: # for only 2 graphs\n", " \n", " start_time = time.time()\n", " \n", " # for WL subtree kernel\n", " if base_kernel == 'subtree':\n", " \n", " args = [args[0], args[1]]\n", "# print(args)\n", " kernel = _wl_subtreekernel_do(args, height = height, base_kernel = 'subtree')\n", " \n", " # for WL edge kernel\n", " elif base_kernel == 'edge':\n", " print('edge')\n", " \n", " # for WL shortest path kernel\n", " elif base_kernel == 'sp':\n", " \n", "\n", " kernel = _pathkernel_do(args[0], args[1])\n", "\n", " print(\"\\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---\" % (base_kernel, time.time() - start_time))\n", " \n", " return kernel\n", " \n", " \n", "def _weisfeilerlehmankernel_do(G1, G2):\n", " \"\"\"Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration.\n", " \n", " Parameters\n", " ----------\n", " G1, G2 : NetworkX graphs\n", " 2 graphs between which the kernel is calculated.\n", " \n", " Return\n", " ------\n", " Kernel : int\n", " Weisfeiler-Lehman Kernel between 2 graphs.\n", " \"\"\"\n", " \n", " # init.\n", " kernel = 0 # init kernel\n", " num_nodes1 = G1.number_of_nodes()\n", " num_nodes2 = G2.number_of_nodes()\n", " height = 12 #min(num_nodes1, num_nodes2)) #Q how to determine the upper bound of the height?\n", " \n", " # the first iteration.\n", " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", " kernel += pathkernel(G1, G2) # change your base kernel here (and one more below)\n", " \n", " for h in range(0, height):\n", "# if labelset1 != labelset2:\n", "# break\n", "\n", " # Weisfeiler-Lehman test of graph isomorphism.\n", " relabel(G1)\n", " relabel(G2)\n", "\n", " # calculate kernel\n", " kernel += pathkernel(G1, G2) # change your base kernel here (and one more before)\n", "\n", " # get label sets of both graphs\n", " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n", " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n", " \n", " return kernel\n", "\n", "\n", "def relabel(G):\n", " '''\n", " Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism.\n", " \n", " Parameters\n", " ----------\n", " G : NetworkX graph\n", " The graphs whose nodes are relabeled.\n", " '''\n", " \n", " # get the set of original labels\n", " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", " print(labels_ori)\n", " num_of_each_label = dict(Counter(labels_ori))\n", " print(num_of_each_label)\n", " num_of_labels = len(num_of_each_label)\n", " print(num_of_labels)\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", " # Multiset-label determination.\n", " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", " # sorting each multiset\n", " multiset.sort()\n", " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", " set_multisets.append(multiset)\n", " print(set_multisets)\n", " \n", " # label compression\n", "# set_multisets.sort() # this is unnecessary\n", " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", " print(set_unique)\n", " set_compressed = { value : str(set_unique.index(value) + num_of_labels + 1) for value in set_unique } # assign new labels\n", " print(set_compressed)\n", " \n", " # relabel nodes\n", "# nx.relabel_nodes(G, set_compressed, copy = False)\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", " print(nx.get_node_attributes(G, 'label'))\n", "\n", " # get the set of compressed labels\n", " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", " print(labels_comp)\n", " num_of_each_label.update(dict(Counter(labels_comp)))\n", " print(num_of_each_label)\n", " \n", " \n", "def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):\n", " \"\"\"Calculate Weisfeiler-Lehman subtree kernels between graphs.\n", " \n", " Parameters\n", " ----------\n", " Gn : List of NetworkX graph\n", " List of graphs between which the kernels are calculated.\n", " \n", " Return\n", " ------\n", " Kmatrix/Kernel : Numpy matrix/int\n", " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.\n", " \"\"\"\n", " \n", "# print(args)\n", " Gn = args[0]\n", "# print(Gn)\n", "\n", " Kmatrix = np.zeros((len(Gn), len(Gn)))\n", " all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs\n", " \n", " # initial for height = 0\n", " print('\\n --- height = 0 --- ')\n", " all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n", " all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n", " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n", " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n", "\n", " # for each graph\n", " for idx, G in enumerate(Gn):\n", " # get the set of original labels\n", " print('\\n --- for graph %d --- \\n' % (idx))\n", " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", " print('labels_ori: %s' % (labels_ori))\n", " all_labels_ori.update(labels_ori)\n", " print('all_labels_ori: %s' % (all_labels_ori))\n", " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", " print('num_of_each_label: %s' % (num_of_each_label))\n", " all_num_of_each_label.append(num_of_each_label)\n", " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n", " num_of_labels = len(num_of_each_label) # number of all unique labels\n", " print('num_of_labels: %s' % (num_of_labels))\n", " \n", "\n", " all_labels_ori.update(labels_ori)\n", " print('all_labels_ori: %s' % (all_labels_ori))\n", " \n", " all_num_of_labels_occured += len(all_labels_ori)\n", " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", " \n", " # calculate subtree kernel with the 0th iteration and add it to the final kernel\n", " print('\\n --- calculating kernel matrix ---')\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", " print('\\n labels: %s' % (labels))\n", " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", " print('vector1: %s' % (vector1))\n", " print('vector2: %s' % (vector2))\n", " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", " print('Kmatrix: %s' % (Kmatrix))\n", "\n", " \n", " # iterate each height\n", " for h in range(1, height + 1):\n", " print('\\n --- height = %d --- ' % (h))\n", " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n", " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n", " all_labels_ori = set()\n", " all_num_of_each_label = []\n", " \n", " # for each graph\n", " for idx, G in enumerate(Gn):\n", "# # get the set of original labels\n", " print('\\n --- for graph %d --- \\n' % (idx))\n", "# labels_ori = list(nx.get_node_attributes(G, 'label').values())\n", "# print('labels_ori: %s' % (labels_ori))\n", "# num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n", "# print('num_of_each_label: %s' % (num_of_each_label))\n", "# num_of_labels = len(num_of_each_label) # number of all unique labels\n", "# print('num_of_labels: %s' % (num_of_labels))\n", " \n", "# all_labels_ori.update(labels_ori)\n", "# print('all_labels_ori: %s' % (all_labels_ori))\n", "# # num_of_labels_occured += num_of_labels #@todo not precise\n", "# num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n", "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", " set_multisets = []\n", " for node in G.nodes(data = True):\n", " # Multiset-label determination.\n", " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n", " # sorting each multiset\n", " multiset.sort()\n", " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n", " set_multisets.append(multiset)\n", " print('multiset: %s' % (set_multisets))\n", "\n", " # label compression\n", " # set_multisets.sort() # this is unnecessary\n", " set_unique = list(set(set_multisets)) # set of unique multiset labels\n", " print('set_unique: %s' % (set_unique))\n", " # a dictionary mapping original labels to new ones. \n", " set_compressed = {}\n", " # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label \n", " for value in set_unique:\n", " if value in all_set_compressed.keys():\n", " set_compressed.update({ value : all_set_compressed[value] })\n", " else:\n", " set_compressed.update({ value : str(num_of_labels_occured + 1) })\n", " num_of_labels_occured += 1\n", "# set_compressed = { value : (all_set_compressed[value] if value in all_set_compressed.keys() else str(set_unique.index(value) + num_of_labels_occured + 1)) for value in set_unique }\n", " print('set_compressed: %s' % (set_compressed))\n", " \n", " all_set_compressed.update(set_compressed)\n", " print('all_set_compressed: %s' % (all_set_compressed))\n", "# num_of_labels_occured += len(set_compressed) #@todo not precise\n", " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n", " \n", " # relabel nodes\n", " # nx.relabel_nodes(G, set_compressed, copy = False)\n", " for node in G.nodes(data = True):\n", " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n", " print('\\n compressed labels: %s' % (nx.get_node_attributes(G, 'label')))\n", "\n", " # get the set of compressed labels\n", " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n", " print('labels_comp: %s' % (labels_comp))\n", " all_labels_ori.update(labels_comp)\n", " print('all_labels_ori: %s' % (all_labels_ori))\n", " num_of_each_label = dict(Counter(labels_comp))\n", " print('num_of_each_label: %s' % (num_of_each_label))\n", " all_num_of_each_label.append(num_of_each_label)\n", " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n", " \n", " all_num_of_labels_occured += len(all_labels_ori)\n", " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n", " \n", " # calculate subtree kernel with h iterations and add it to the final kernel\n", " print('\\n --- calculating kernel matrix ---')\n", " for i in range(0, len(Gn)):\n", " for j in range(i, len(Gn)):\n", " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n", " print('\\n labels: %s' % (labels))\n", " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n", " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n", " print('vector1: %s' % (vector1))\n", " print('vector2: %s' % (vector2))\n", " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n", " Kmatrix[j][i] = Kmatrix[i][j]\n", " \n", " print('\\n Kmatrix: %s' % (Kmatrix))\n", "\n", " return Kmatrix\n", "\n", " \n", "# main\n", "import sys\n", "from collections import Counter\n", "import networkx as nx\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.kernels.spkernel import spkernel\n", "\n", "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", "G1 = dataset[15]\n", "print(nx.get_node_attributes(G1, 'label'))\n", "G2 = dataset[80]\n", "print(nx.get_node_attributes(G2, 'label'))\n", "\n", "weisfeilerlehmankernel(G1, G2, height = 2)\n", "# Kmatrix = weisfeilerlehmankernel(G1, G2)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }