Browse Source

A lot of things changed...

v0.1
jajupmochi 6 years ago
parent
commit
347b44cdd9
100 changed files with 851127 additions and 115 deletions
  1. +14
    -6
      README.md
  2. +40
    -0
      notebooks/check_gm.py
  3. BIN
      notebooks/check_gm.zip
  4. +28562
    -0
      notebooks/check_gm/Acyclic.gm.eps
  5. +15
    -0
      notebooks/check_gm/Acyclic_results.txt
  6. +41291
    -0
      notebooks/check_gm/Alkane.gm.eps
  7. +14
    -0
      notebooks/check_gm/Alkane_results.txt
  8. +65
    -0
      notebooks/check_gm/ENZYMES_results.txt
  9. +41476
    -0
      notebooks/check_gm/Letter-med.gm.eps
  10. +18
    -0
      notebooks/check_gm/Letter-med_results.txt
  11. +41401
    -0
      notebooks/check_gm/MAO.gm.eps
  12. +22
    -0
      notebooks/check_gm/MAO_results.txt
  13. +29420
    -0
      notebooks/check_gm/MUTAG.gm.eps
  14. +15
    -0
      notebooks/check_gm/MUTAG_results.txt
  15. +13
    -0
      notebooks/check_gm/Mutagenicity_results.txt
  16. +30691
    -0
      notebooks/check_gm/PAH.gm.eps
  17. +11
    -0
      notebooks/check_gm/PAH_results.txt
  18. +28803
    -0
      notebooks/check_gm/spkernel/Acyclic.gm.eps
  19. +39027
    -0
      notebooks/check_gm/spkernel/Acyclic.gm.jstsp.eps
  20. BIN
      notebooks/check_gm/spkernel/Acyclic.gm.jstsp.npz
  21. BIN
      notebooks/check_gm/spkernel/Acyclic.gm.npz
  22. +32
    -0
      notebooks/check_gm/spkernel/Acyclic_results.txt
  23. +39162
    -0
      notebooks/check_gm/spkernel/Alkane.gm.eps
  24. +39162
    -0
      notebooks/check_gm/spkernel/Alkane.gm.jstsp.eps
  25. BIN
      notebooks/check_gm/spkernel/Alkane.gm.jstsp.npz
  26. BIN
      notebooks/check_gm/spkernel/Alkane.gm.npz
  27. +32
    -0
      notebooks/check_gm/spkernel/Alkane_results.txt
  28. +41320
    -0
      notebooks/check_gm/spkernel/ENZYMES.gm.eps
  29. +41293
    -0
      notebooks/check_gm/spkernel/ENZYMES.gm.jstsp.eps
  30. BIN
      notebooks/check_gm/spkernel/ENZYMES.gm.jstsp.npz
  31. BIN
      notebooks/check_gm/spkernel/ENZYMES.gm.npz
  32. +65
    -0
      notebooks/check_gm/spkernel/ENZYMES_results.txt
  33. +41279
    -0
      notebooks/check_gm/spkernel/Letter-med.gm.eps
  34. +39121
    -0
      notebooks/check_gm/spkernel/Letter-med.gm.jstsp.eps
  35. BIN
      notebooks/check_gm/spkernel/Letter-med.gm.jstsp.npz
  36. BIN
      notebooks/check_gm/spkernel/Letter-med.gm.npz
  37. +18
    -0
      notebooks/check_gm/spkernel/Letter-med_results.txt
  38. +39142
    -0
      notebooks/check_gm/spkernel/MAO.gm.eps
  39. +39142
    -0
      notebooks/check_gm/spkernel/MAO.gm.jstsp.eps
  40. BIN
      notebooks/check_gm/spkernel/MAO.gm.jstsp.npz
  41. BIN
      notebooks/check_gm/spkernel/MAO.gm.npz
  42. +22
    -0
      notebooks/check_gm/spkernel/MAO_results.txt
  43. +39027
    -0
      notebooks/check_gm/spkernel/MUTAG.gm.eps
  44. +39027
    -0
      notebooks/check_gm/spkernel/MUTAG.gm.jstsp.eps
  45. BIN
      notebooks/check_gm/spkernel/MUTAG.gm.jstsp.npz
  46. BIN
      notebooks/check_gm/spkernel/MUTAG.gm.npz
  47. +32
    -0
      notebooks/check_gm/spkernel/MUTAG_results.txt
  48. +41331
    -0
      notebooks/check_gm/spkernel/Mutagenicity.gm.eps
  49. +13
    -0
      notebooks/check_gm/spkernel/Mutagenicity_results.txt
  50. +41179
    -0
      notebooks/check_gm/spkernel/PAH.gm.eps
  51. +41179
    -0
      notebooks/check_gm/spkernel/PAH.gm.jstsp.eps
  52. BIN
      notebooks/check_gm/spkernel/PAH.gm.jstsp.npz
  53. BIN
      notebooks/check_gm/spkernel/PAH.gm.npz
  54. +24
    -0
      notebooks/check_gm/spkernel/PAH_results.txt
  55. +2043
    -0
      notebooks/check_gm/spkernel/compare_eigen_values.eps
  56. +3107
    -0
      notebooks/check_gm/spkernel/compare_running_time.eps
  57. +134
    -0
      notebooks/check_gm_gstsp.py
  58. +77
    -0
      notebooks/draw_datasize_vs_chunksize.py
  59. +64
    -0
      notebooks/draw_gm.py
  60. +69
    -0
      notebooks/draw_running_time.py
  61. +2
    -2
      notebooks/run_commonwalkkernel.ipynb
  62. +85
    -0
      notebooks/run_commonwalkkernel.py
  63. +2
    -2
      notebooks/run_marginalizedkernel.ipynb
  64. +81
    -0
      notebooks/run_marginalizedkernel.py
  65. +2
    -2
      notebooks/run_pathkernel.ipynb
  66. +14
    -100
      notebooks/run_spkernel.py
  67. +86
    -0
      notebooks/run_structuralspkernel.py
  68. +2
    -2
      notebooks/run_untilhpathkernel.ipynb
  69. +84
    -0
      notebooks/run_untilhpathkernel.py
  70. +26
    -1
      notebooks/test_networkx.ipynb
  71. +690
    -0
      notebooks/test_parallel.py
  72. +2293
    -0
      notebooks/test_parallel/0.eps
  73. +2451
    -0
      notebooks/test_parallel/1.eps
  74. +2642
    -0
      notebooks/test_parallel/2.eps
  75. +2832
    -0
      notebooks/test_parallel/3.eps
  76. +3242
    -0
      notebooks/test_parallel/4.eps
  77. +3880
    -0
      notebooks/test_parallel/5.eps
  78. BIN
      notebooks/test_parallel/Acyclic.npy
  79. BIN
      notebooks/test_parallel/Alkane.npy
  80. +15
    -0
      notebooks/test_parallel/ENZYMES.txt
  81. BIN
      notebooks/test_parallel/Letter-med.npy
  82. BIN
      notebooks/test_parallel/MAO.npy
  83. BIN
      notebooks/test_parallel/MUTAG.npy
  84. BIN
      notebooks/test_parallel/PAH.npy
  85. +14
    -0
      notebooks/test_parallel/commonwalkkernel.Acyclic.txt
  86. +14
    -0
      notebooks/test_parallel/commonwalkkernel.Letter-med.txt
  87. BIN
      notebooks/test_parallel/commonwalkkernel.MAO.npy
  88. +14
    -0
      notebooks/test_parallel/commonwalkkernel.MUTAG.txt
  89. BIN
      notebooks/test_parallel/commonwalkkernel.PAH.npy
  90. +2436
    -0
      notebooks/test_parallel/commonwalkkernel0.eps
  91. +2634
    -0
      notebooks/test_parallel/commonwalkkernel1.eps
  92. +2313
    -0
      notebooks/test_parallel/myria/0.eps
  93. +2471
    -0
      notebooks/test_parallel/myria/1.eps
  94. +2662
    -0
      notebooks/test_parallel/myria/2.eps
  95. +2852
    -0
      notebooks/test_parallel/myria/3.eps
  96. +3282
    -0
      notebooks/test_parallel/myria/4.eps
  97. +3920
    -0
      notebooks/test_parallel/myria/5.eps
  98. +2092
    -0
      notebooks/test_parallel/myria/6.eps
  99. BIN
      notebooks/test_parallel/myria/Acyclic.npy
  100. BIN
      notebooks/test_parallel/myria/Alkane.npy

+ 14
- 6
README.md View File

@@ -3,16 +3,24 @@ A python package for graph kernels.

## Requirements

numpy==1.15.1
python==3.6.5
numpy==1.15.2
scipy==1.1.0
matplotlib==2.2.2
networkx==2.1
scikit-learn==0.19.1
matplotlib==3.0.0
networkx==2.2
scikit-learn==0.20.0
tabulate==0.8.2
tqdm==4.23.4
control==0.7.0 (for generalized random walk kernels only)
tqdm==4.26.0
suffix-tree==0.0.6 (for the path kernel up to length h only)
control==0.8.0 (for generalized random walk kernels only)
slycot===0.3.2.dev-5263ada (for generalized random walk kernels only, requires fortran compiler, gfortran for example)

Modules better be upgraded.
ipython==7.0.1
ipykernel=5.1.0
ipython-genutils=0.2.0
ipywidgets==7.4.2

## Results with minimal test RMSE for each kernel on dataset Asyclic

All kernels expect for Cyclic pattern kernel are tested on dataset Asyclic, which consists of 185 molecules (graphs). (Cyclic pattern kernel is tested on dataset MAO and PAH.)


+ 40
- 0
notebooks/check_gm.py View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test gram matrices.
Created on Wed Sep 19 15:32:29 2018

@author: ljia
"""

import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import eig

# read gram matrices from file.
results_dir = 'results/marginalizedkernel/myria'
ds_name = 'MUTAG'
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
#print('gm time: ', gmfile['gmtime'])
# a list to store gram matrices for all param_grid_precomputed
gram_matrices = gmfile['gms']
# param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
#y = gmfile['y'].tolist()
#x = gram_matrices[0]

for x in gram_matrices:
plt.imshow(x)
plt.colorbar()
plt.savefig('check_gm/' + ds_name + '.gm.eps', format='eps', dpi=300)
# print(np.transpose(x))
print('if symmetric: ', np.array_equal(x, np.transpose(x)))
print('diag: ', np.diag(x))
print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1))
print('min, max diag: ', min(np.diag(x)), max(np.diag(x)))
print('mean x: ', np.mean(np.mean(x)))
[lamnda, v] = eig(x)
print('min, max lambda: ', min(lamnda), max(lamnda))
if -1e-10 > min(lamnda):
raise Exception('wrong eigen values.')

BIN
notebooks/check_gm.zip View File


+ 28562
- 0
notebooks/check_gm/Acyclic.gm.eps
File diff suppressed because it is too large
View File


+ 15
- 0
notebooks/check_gm/Acyclic_results.txt View File

@@ -0,0 +1,15 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.627451206897581
min, max lambda: (-3.1105220613135224e-15+0j) (118.43667381920945+0j)

+ 41291
- 0
notebooks/check_gm/Alkane.gm.eps
File diff suppressed because it is too large
View File


+ 14
- 0
notebooks/check_gm/Alkane_results.txt View File

@@ -0,0 +1,14 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9275876964656552
min, max lambda: (-6.000888652084434e-15+0j) (140.45172231003968+0j)

+ 65
- 0
notebooks/check_gm/ENZYMES_results.txt View File

@@ -0,0 +1,65 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.47604228590012404
min, max lambda: 0.0001002381061317695 299.17895175532897

jstsp:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.8766424893590421
min, max lambda: (-4.04460628433013e-14+0j) (529.3691973508182+0j)

+ 41476
- 0
notebooks/check_gm/Letter-med.gm.eps
File diff suppressed because it is too large
View File


+ 18
- 0
notebooks/check_gm/Letter-med_results.txt View File

@@ -0,0 +1,18 @@
mine:

if symmetric: True
diag: [1. 1. 1. ... 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.40955939575371497
min, max lambda: 2.196833029054622e-10 943.9175660197347


jstsp:

if symmetric: False
diag: [1. 1. 1. ... 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: nan
LinAlgError: Array must not contain infs or NaNs

+ 41401
- 0
notebooks/check_gm/MAO.gm.eps
File diff suppressed because it is too large
View File


+ 22
- 0
notebooks/check_gm/MAO_results.txt View File

@@ -0,0 +1,22 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9449639567258331
min, max lambda: (-1.1563146193980238e-15+0j) (64.31844814063015+0j)


jstsp:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9632407703418899
min, max lambda: (-7.502900269338164e-16-3.0606423294452126e-17j) (65.53092059526354+0j)

+ 29420
- 0
notebooks/check_gm/MUTAG.gm.eps
File diff suppressed because it is too large
View File


+ 15
- 0
notebooks/check_gm/MUTAG_results.txt View File

@@ -0,0 +1,15 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.8881281122679946
min, max lambda: (-1.136106712787655e-14+0j) (177.00000000000003+0j)

+ 13
- 0
notebooks/check_gm/Mutagenicity_results.txt View File

@@ -0,0 +1,13 @@
mine:

if symmetric: True
diag: [1. 1. 1. ... 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.633201506393603
min, max lambda: (-3.568957793283026e-14-1.2061062712627808e-14j) (2848.6624188252435+0j)


jstsp:



+ 30691
- 0
notebooks/check_gm/PAH.gm.eps
File diff suppressed because it is too large
View File


+ 11
- 0
notebooks/check_gm/PAH_results.txt View File

@@ -0,0 +1,11 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9586479796584891
min, max lambda: (-1.3363208262303617e-15+0j) (90.2113226507757+0j)

+ 28803
- 0
notebooks/check_gm/spkernel/Acyclic.gm.eps
File diff suppressed because it is too large
View File


+ 39027
- 0
notebooks/check_gm/spkernel/Acyclic.gm.jstsp.eps
File diff suppressed because it is too large
View File


BIN
notebooks/check_gm/spkernel/Acyclic.gm.jstsp.npz View File


BIN
notebooks/check_gm/spkernel/Acyclic.gm.npz View File


+ 32
- 0
notebooks/check_gm/spkernel/Acyclic_results.txt View File

@@ -0,0 +1,32 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.7656098059302923
min, max lambda (-3.425001366427846e-15-2.236521917797024e-16j) (142.86649135778595+0j)


jstsp:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9412011742906522
min, max lambda (-9.433792343294819e-15-2.5978926329723254e-15j) (172.4203026547106+0j)

+ 39162
- 0
notebooks/check_gm/spkernel/Alkane.gm.eps
File diff suppressed because it is too large
View File


+ 39162
- 0
notebooks/check_gm/spkernel/Alkane.gm.jstsp.eps
File diff suppressed because it is too large
View File


BIN
notebooks/check_gm/spkernel/Alkane.gm.jstsp.npz View File


BIN
notebooks/check_gm/spkernel/Alkane.gm.npz View File


+ 32
- 0
notebooks/check_gm/spkernel/Alkane_results.txt View File

@@ -0,0 +1,32 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9372426749560228
min, max lambda (-5.513191435356332e-15+0j) (140.08307372708344+0j)


jstsp:

if symmetric: False
diag: [nan 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: nan nan
mean x: nan
LinAlgError: Array must not contain infs or NaNs

+ 41320
- 0
notebooks/check_gm/spkernel/ENZYMES.gm.eps
File diff suppressed because it is too large
View File


+ 41293
- 0
notebooks/check_gm/spkernel/ENZYMES.gm.jstsp.eps
File diff suppressed because it is too large
View File


BIN
notebooks/check_gm/spkernel/ENZYMES.gm.jstsp.npz View File


BIN
notebooks/check_gm/spkernel/ENZYMES.gm.npz View File


+ 65
- 0
notebooks/check_gm/spkernel/ENZYMES_results.txt View File

@@ -0,0 +1,65 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.47604228590012404
min, max lambda: 0.0001002381061317695 299.17895175532897

jstsp:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.8766424893590421
min, max lambda: (-4.04460628433013e-14+0j) (529.3691973508182+0j)

+ 41279
- 0
notebooks/check_gm/spkernel/Letter-med.gm.eps
File diff suppressed because it is too large
View File


+ 39121
- 0
notebooks/check_gm/spkernel/Letter-med.gm.jstsp.eps
File diff suppressed because it is too large
View File


BIN
notebooks/check_gm/spkernel/Letter-med.gm.jstsp.npz View File


BIN
notebooks/check_gm/spkernel/Letter-med.gm.npz View File


+ 18
- 0
notebooks/check_gm/spkernel/Letter-med_results.txt View File

@@ -0,0 +1,18 @@
mine:

if symmetric: True
diag: [1. 1. 1. ... 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.40955939575371497
min, max lambda: 2.196833029054622e-10 943.9175660197347


jstsp:

if symmetric: False
diag: [1. 1. 1. ... 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: nan
LinAlgError: Array must not contain infs or NaNs

+ 39142
- 0
notebooks/check_gm/spkernel/MAO.gm.eps
File diff suppressed because it is too large
View File


+ 39142
- 0
notebooks/check_gm/spkernel/MAO.gm.jstsp.eps
File diff suppressed because it is too large
View File


BIN
notebooks/check_gm/spkernel/MAO.gm.jstsp.npz View File


BIN
notebooks/check_gm/spkernel/MAO.gm.npz View File


+ 22
- 0
notebooks/check_gm/spkernel/MAO_results.txt View File

@@ -0,0 +1,22 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9449639567258331
min, max lambda: (-1.1563146193980238e-15+0j) (64.31844814063015+0j)


jstsp:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9632407703418899
min, max lambda: (-7.502900269338164e-16-3.0606423294452126e-17j) (65.53092059526354+0j)

+ 39027
- 0
notebooks/check_gm/spkernel/MUTAG.gm.eps
File diff suppressed because it is too large
View File


+ 39027
- 0
notebooks/check_gm/spkernel/MUTAG.gm.jstsp.eps
File diff suppressed because it is too large
View File


BIN
notebooks/check_gm/spkernel/MUTAG.gm.jstsp.npz View File


BIN
notebooks/check_gm/spkernel/MUTAG.gm.npz View File


+ 32
- 0
notebooks/check_gm/spkernel/MUTAG_results.txt View File

@@ -0,0 +1,32 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.8501340720551299
min, max lambda: (-5.811474553224136e-15+0j) (160.72585558445357+0j)


jstsp:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9591293919401286
min, max lambda: (-8.73626400337456e-15+0j) (180.374192331094+0j)

+ 41331
- 0
notebooks/check_gm/spkernel/Mutagenicity.gm.eps
File diff suppressed because it is too large
View File


+ 13
- 0
notebooks/check_gm/spkernel/Mutagenicity_results.txt View File

@@ -0,0 +1,13 @@
mine:

if symmetric: True
diag: [1. 1. 1. ... 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.633201506393603
min, max lambda: (-3.568957793283026e-14-1.2061062712627808e-14j) (2848.6624188252435+0j)


jstsp:



+ 41179
- 0
notebooks/check_gm/spkernel/PAH.gm.eps
File diff suppressed because it is too large
View File


+ 41179
- 0
notebooks/check_gm/spkernel/PAH.gm.jstsp.eps
File diff suppressed because it is too large
View File


BIN
notebooks/check_gm/spkernel/PAH.gm.jstsp.npz View File


BIN
notebooks/check_gm/spkernel/PAH.gm.npz View File


+ 24
- 0
notebooks/check_gm/spkernel/PAH_results.txt View File

@@ -0,0 +1,24 @@
mine:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9825763767199961
min, max lambda: (-1.3999833987183273e-15+0j) (92.38382991977493+0j)


jstsp:

if symmetric: True
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum diag < 0.1: 0
min, max diag: 1.0 1.0
mean x: 0.9825763767199961
min, max lambda: (-1.3999833987183273e-15+0j) (92.38382991977493+0j)

+ 2043
- 0
notebooks/check_gm/spkernel/compare_eigen_values.eps
File diff suppressed because it is too large
View File


+ 3107
- 0
notebooks/check_gm/spkernel/compare_running_time.eps
File diff suppressed because it is too large
View File


+ 134
- 0
notebooks/check_gm_gstsp.py View File

@@ -0,0 +1,134 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
compute gm with load_data.py and test them.
Created on Wed Sep 19 16:12:13 2018

@author: ljia
"""

"""Shortest-Path graph kernel.
Python implementation based on: "Shortest-path kernels on graphs", by
Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE
International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005
doi: 10.1109/ICDM.2005.132
Author : Sandro Vega-Pons, Emanuele Olivetti
"""

import sys
sys.path.insert(0, "../")
import numpy as np
import networkx as nx
from pygraph.utils.graphfiles import loadDataset
import matplotlib.pyplot as plt
from numpy.linalg import eig


class GK_SP:
"""
Shorthest path graph kernel.
"""

def compare(self, g_1, g_2, verbose=False):
"""Compute the kernel value (similarity) between two graphs.
Parameters
----------
g1 : networkx.Graph
First graph.
g2 : networkx.Graph
Second graph.
Returns
-------
k : The similarity value between g1 and g2.
"""
# Diagonal superior matrix of the floyd warshall shortest
# paths:
fwm1 = np.array(nx.floyd_warshall_numpy(g_1))
fwm1 = np.where(fwm1 == np.inf, 0, fwm1)
fwm1 = np.where(fwm1 == np.nan, 0, fwm1)
fwm1 = np.triu(fwm1, k=1)
bc1 = np.bincount(fwm1.reshape(-1).astype(int))

fwm2 = np.array(nx.floyd_warshall_numpy(g_2))
fwm2 = np.where(fwm2 == np.inf, 0, fwm2)
fwm2 = np.where(fwm2 == np.nan, 0, fwm2)
fwm2 = np.triu(fwm2, k=1)
bc2 = np.bincount(fwm2.reshape(-1).astype(int))

# Copy into arrays with the same length the non-zero shortests
# paths:
v1 = np.zeros(max(len(bc1), len(bc2)) - 1)
v1[range(0, len(bc1)-1)] = bc1[1:]

v2 = np.zeros(max(len(bc1), len(bc2)) - 1)
v2[range(0, len(bc2)-1)] = bc2[1:]

return np.sum(v1 * v2)

def compare_normalized(self, g_1, g_2, verbose=False):
"""Compute the normalized kernel value between two graphs.
A normalized version of the kernel is given by the equation:
k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2))
Parameters
----------
g1 : networkx.Graph
First graph.
g2 : networkx.Graph
Second graph.
Returns
-------
k : The similarity value between g1 and g2.
"""
return self.compare(g_1, g_2) / (np.sqrt(self.compare(g_1, g_1) *
self.compare(g_2, g_2)))

def compare_list(self, graph_list, verbose=False):
"""Compute the all-pairs kernel values for a list of graphs.
This function can be used to directly compute the kernel
matrix for a list of graphs. The direct computation of the
kernel matrix is faster than the computation of all individual
pairwise kernel values.
Parameters
----------
graph_list: list
A list of graphs (list of networkx graphs)
Return
------
K: numpy.array, shape = (len(graph_list), len(graph_list))
The similarity matrix of all graphs in graph_list.
"""
n = len(graph_list)
k = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
k[i, j] = self.compare(graph_list[i], graph_list[j])
k[j, i] = k[i, j]

k_norm = np.zeros(k.shape)
for i in range(k.shape[0]):
for j in range(k.shape[1]):
k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])

return k_norm


ds_name = 'PAH'
datafile = '../datasets/PAH/dataset.ds'
dataset, y = loadDataset(datafile, filename_y=None, extra_params=None)
gk_sp = GK_SP()
x = gk_sp.compare_list(dataset)
np.savez('check_gm/' + ds_name + '.gm.jstsp', gms=x)

plt.imshow(x)
plt.colorbar()
plt.savefig('check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300)
# print(np.transpose(x))
print('if symmetric: ', np.array_equal(x, np.transpose(x)))

print('diag: ', np.diag(x))
print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1))
print('min, max diag: ', min(np.diag(x)), max(np.diag(x)))
print('mean x: ', np.mean(np.mean(x)))

[lamnda, v] = eig(x)
print('min, max lambda: ', min(lamnda), max(lamnda))

+ 77
- 0
notebooks/draw_datasize_vs_chunksize.py View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Draw data size v.s. chunksize.
Created on Mon Oct 8 12:00:45 2018

@author: ljia
"""
import numpy as np
import matplotlib.pyplot as plt

def loadmin(file):
result = np.load(file)
return np.argmin(result), np.min(result), result

def idx2chunksize(idx):
if idx < 9:
return idx * 10 + 10
elif idx < 18:
return (idx - 8) * 100
elif idx < 27:
return (idx - 17) * 1000
else:
return (idx - 26) * 10000
def idx2chunksize2(idx):
if idx < 5:
return idx * 20 + 10
elif idx < 10:
return (idx - 5) * 200 + 100
elif idx < 15:
return (idx - 10) * 2000 + 1000
else:
return (idx - 15) * 20000 * 10000

idx, mrlt, rlt = loadmin('test_parallel/myria/ENZYMES.npy')
csize = idx2chunksize2(idx)

#dsize = np.array([183, 150, 68, 94, 188, 2250, 600])
dsize = np.array([183, 150, 68, 94, 188, 2250])
dsize = dsize * (dsize + 1) / 2
#cs_sp_laptop = [900, 400, 70, 900, 2000, 8000, 300]
#cs_sp_myria = [900, 500, 500, 300, 400, 4000, 300]
cs_sp_laptop = [900, 400, 70, 900, 2000, 8000]
cs_sp_myria = [900, 500, 500, 300, 400, 4000]
cd_ssp_laptop = [500, 700, 500, 70, 3000, 3000]
cd_ssp_myria = [100, 90, 700, 30, 3000, 5000]

cs_sp_laptop = [x for _, x in sorted(zip(dsize, cs_sp_laptop))]
cs_sp_myria = [x for _, x in sorted(zip(dsize, cs_sp_myria))]
cd_ssp_laptop = [x for _, x in sorted(zip(dsize[0:6], cd_ssp_laptop))]
cd_ssp_myria = [x for _, x in sorted(zip(dsize[0:6], cd_ssp_myria))]
dsize.sort()
cd_mean = np.mean([cs_sp_laptop[0:6], cs_sp_myria[0:6], cd_ssp_laptop, cd_ssp_myria],
axis=0)
#np.append(cd_mean, [6000])

fig, ax = plt.subplots()
##p1 = ax.plot(dsize, cs_sp_laptop, 'o-', label='sp laptop')
#p2 = ax.plot(dsize, cs_sp_myria, 'o-', label='sp CRIANN')
##p3 = ax.plot(dsize[0:6], cd_ssp_laptop, 'o-', label='ssp laptop')
#p4 = ax.plot(dsize[0:6], cd_ssp_myria, 'o-', label='ssp CRIANN')
#p5 = ax.plot(dsize[0:6], cd_mean, 'o-', label='mean')

#p1 = ax.plot(dsize[0:5], cs_sp_laptop[0:5], 'o-', label='sp laptop')
p2 = ax.plot(dsize[0:5], cs_sp_myria[0:5], 'o-', label='sp CRIANN')
#p3 = ax.plot(dsize[0:5], cd_ssp_laptop[0:5], 'o-', label='ssp laptop')
p4 = ax.plot(dsize[0:5], cd_ssp_myria[0:5], 'o-', label='ssp CRIANN')
p5 = ax.plot(dsize[0:5], cd_mean[0:5], 'o-', label='mean')


#ax.set_xscale('log', nonposx='clip')
#ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('data sizes')
ax.set_ylabel('chunksizes for the fastest computation')
ax.legend(loc='upper left')
plt.show()

+ 64
- 0
notebooks/draw_gm.py View File

@@ -0,0 +1,64 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Compare gram matrices
Created on Mon Sep 24 10:52:25 2018

@author: ljia
"""
import numpy as np
import matplotlib.pyplot as plt

N = 7
egmin = [-3.425001366427846e-15,
-5.513191435356332e-15,
-1.1563146193980238e-15,
-1.3999833987183273e-15,
-5.811474553224136e-15,
2.196833029054622e-10,
0.0001002381061317695]
egmin = np.abs(egmin)
egmin2 = [-9.433792343294819e-15,
np.NaN,
-7.502900269338164e-16,
-1.3999833987183273e-15,
-8.73626400337456e-15,
np.NaN,
-4.04460628433013e-14]
egmin2 = np.abs(egmin2)
egmax = [142.86649135778595,
140.08307372708344,
64.31844814063015,
92.38382991977493,
160.72585558445357,
943.9175660197347,
299.17895175532897]
egmax2 = [172.4203026547106,
np.NaN,
65.53092059526354,
92.38382991977493,
180.374192331094,
np.NaN,
529.3691973508182]

fig, ax = plt.subplots()

ind = np.arange(N) # the x locations for the groups
width = 0.20 # the width of the bars: can also be len(x) sequence

p1 = ax.bar(ind, egmin, width)
p2 = ax.bar(ind, egmax, width, bottom=egmin)
p3 = ax.bar(ind + width, egmin2, width)
p4 = ax.bar(ind + width, egmax2, width, bottom=egmin2)

ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('datasets')
ax.set_ylabel('absolute eigen values')
ax.set_title('Absolute eigen values of gram matrices on all datasets')
plt.xticks(ind + width / 2, ('Acyclic', 'Alkane', 'MAO', 'PAH', 'MUTAG', 'Letter-med', 'ENZYMES'))
#ax.set_yticks(np.logspace(-16, -3, num=20, base=10))
ax.set_ylim(bottom=1e-15)
ax.legend((p1[0], p2[0], p3[0], p4[0]), ('min1', 'max1', 'min2', 'max2'), loc='upper right')

plt.savefig('check_gm/compare_eigen_values.eps', format='eps', dpi=300)
plt.show()

+ 69
- 0
notebooks/draw_running_time.py View File

@@ -0,0 +1,69 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Draw running time.
Created on Mon Sep 24 17:37:26 2018

@author: ljia
"""

import numpy as np
import matplotlib.pyplot as plt

N = 6
tgm1 = [3.68,
2.24,
3.34,
# 0,
20.00,
2020.46,
3198.84]
tgm2 = [4.29,
3.35,
5.78,
# 11.21,
40.58,
3136.26,
17222.21]
tms1 = [51.19,
73.09,
5.01,
# 0,
22.87,
2211.97,
3211.58]
tms2 = [65.16,
53.02,
10.32,
# 1162.41,
49.86,
3931.68,
17270.55]

fig, ax = plt.subplots()

ind = np.arange(N) # the x locations for the groups
width = 0.30 # the width of the bars: can also be len(x) sequence

p1 = ax.bar(ind, tgm1, width, label='$t_{gm}$ CRIANN')
p2 = ax.bar(ind, tms1, width, bottom=tgm1, label='$t_{ms}$ CRIANN')
p3 = ax.bar(ind + width, tgm2, width, label='$t_{gm}$ laptop')
p4 = ax.bar(ind + width, tms2, width, bottom=tgm2, label='$t_{ms}$ laptop')

ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('datasets')
ax.set_ylabel('runtime($s$)')
ax.set_title('Runtime of the shortest path kernel on all datasets')
plt.xticks(ind + width / 2, ('Acyclic', 'Alkane', 'MAO', 'MUTAG', 'Letter-med', 'ENZYMES'))
#ax.set_yticks(np.logspace(-16, -3, num=20, base=10))
#ax.set_ylim(bottom=1e-15)
ax.legend(loc='upper left')

ax2 = ax.twinx()
p1 = ax2.plot(ind + width / 2, np.array(tgm2) / np.array(tgm1), 'ro-',
label='$t_{gm}$ laptop / $t_{gm}$ CRIANN')
ax2.set_ylabel('ratios')
ax2.legend(loc='upper center')

plt.savefig('check_gm/compare_running_time.eps', format='eps', dpi=300)
plt.show()

+ 2
- 2
notebooks/run_commonwalkkernel.ipynb View File

@@ -2341,8 +2341,8 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"display_name": "Python 3 (Spyder)",
"language": "python3",
"name": "python3"
},
"language_info": {


+ 85
- 0
notebooks/run_commonwalkkernel.py View File

@@ -0,0 +1,85 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 17:01:13 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel

from pygraph.kernels.commonWalkKernel import commonwalkkernel
from pygraph.utils.kernels import deltakernel, kernelproduct

dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb

# # not working below
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = commonwalkkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = [{'compute_method': ['geo'],
'weight': np.logspace(0, -10, num=11, base=10)},
{'compute_method': ['exp'], 'weight': range(0, 10)}]
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 2
- 2
notebooks/run_marginalizedkernel.ipynb View File

@@ -1637,8 +1637,8 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"display_name": "Python 3 (Spyder)",
"language": "python3",
"name": "python3"
},
"language_info": {


+ 81
- 0
notebooks/run_marginalizedkernel.py View File

@@ -0,0 +1,81 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 18:58:47 2018

@author: ljia
"""

from libs import *
import multiprocessing

from pygraph.kernels.marginalizedKernel import marginalizedkernel

dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb

# # not working below
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = marginalizedkernel
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9),
'n_iteration': np.linspace(2, 20, 10),
'remove_totters': [False]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 2
- 2
notebooks/run_pathkernel.ipynb View File

@@ -1039,8 +1039,8 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"display_name": "Python 3 (Spyder)",
"language": "python3",
"name": "python3"
},
"language_info": {


+ 14
- 100
notebooks/run_spkernel.py View File

@@ -1,27 +1,25 @@
# %load_ext line_profiler
# %matplotlib inline
import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel

from pygraph.kernels.spKernel import spkernel, spkernel_do
from pygraph.kernels.spKernel import spkernel
from pygraph.utils.kernels import deltakernel, kernelproduct
from pygraph.utils.model_selection_precomputed import trial_do
#from pygraph.utils.model_selection_precomputed import trial_do

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node symb/nsymb
# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# # node/edge symb
# node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb
@@ -57,7 +55,7 @@ estimator = spkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'node_kernels': [
{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
param_grid = [{'C': np.logspace(-10, 3, num=27, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
@@ -76,88 +74,4 @@ for ds in dslist:
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)

# %lprun -f trial_do -f spkernel -f spkernel_do -f model_selection_for_precomputed_kernel \
# model_selection_for_precomputed_kernel( \
# ds['dataset'], \
# estimator, \
# param_grid_precomputed, \
# (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \
# (ds['task'] if 'task' in ds else 'classification'), \
# NUM_TRIALS=30, \
# datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), \
# extra_params=(ds['extra_params'] if 'extra_params' in ds else None), \
# ds_name=ds['name'], \
# n_jobs=multiprocessing.cpu_count())
print()

# import functools
# from libs import *
# from pygraph.kernels.spKernel import spkernel
# from pygraph.utils.kernels import deltakernel, kernelsum
# from sklearn.metrics.pairwise import rbf_kernel

# dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', 'task': 'regression'}, # node symb
# # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',}, # unlabeled
# # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',}, # node/edge symb
# # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',}, # contains single node graph, node symb
# # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, # node/edge symb
# # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
# # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# # 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
# # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
# # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
# # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
# # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb

# # # not working below
# # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
# ]
# estimator = spkernel
# mixkernel = functools.partial(kernelsum, deltakernel, rbf_kernel)
# param_grid_precomputed = {'node_kernels': [{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}
# param_grid = [{'C': np.logspace(-10, 10, num = 41, base = 10)},
# {'alpha': np.logspace(-10, 10, num = 41, base = 10)}]

# for ds in dslist:
# print()
# print(ds['name'])
# model_selection_for_precomputed_kernel(
# ds['dataset'], estimator, param_grid_precomputed,
# (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]),
# (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30,
# datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
# extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
# ds_name=ds['name'])

# # %lprun -f spkernel \
# # model_selection_for_precomputed_kernel( \
# # ds['dataset'], estimator, param_grid_precomputed, \
# # (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \
# # (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30, \
# # datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), \
# # extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
# print()

+ 86
- 0
notebooks/run_structuralspkernel.py View File

@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 16:37:29 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel

from pygraph.kernels.structuralspKernel import structuralspkernel
from pygraph.utils.kernels import deltakernel, kernelproduct

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb

# # not working below
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = structuralspkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 2
- 2
notebooks/run_untilhpathkernel.ipynb View File

@@ -5613,8 +5613,8 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"display_name": "Python 3 (Spyder)",
"language": "python3",
"name": "python3"
},
"language_info": {


+ 84
- 0
notebooks/run_untilhpathkernel.py View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 5 19:19:33 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel

from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.utils.kernels import deltakernel, kernelproduct

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb

# # not working below
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = untilhpathkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'depth': np.linspace(7, 10, 10),
'k_func': ['tanimoto', 'MinMax']}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 26
- 1
notebooks/test_networkx.ipynb View File

@@ -2,6 +2,31 @@
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(1, {}), (2, {}), (3, {})]\n",
"[(1, 2, {}), (2, 1, {}), (3, 1, {})]\n",
"{2: {}}\n"
]
}
],
"source": [
"import networkx as nx\n",
"dg = nx.DiGraph()\n",
"dg.add_nodes_from([1, 2, 3])\n",
"dg.add_edges_from([(1, 2), (2, 1), (3, 1)])\n",
"print(dg.nodes(data=True))\n",
"print(dg.edges(data=True))\n",
"print(dg[1])"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
@@ -489,7 +514,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.6.5"
}
},
"nbformat": 4,


+ 690
- 0
notebooks/test_parallel.py View File

@@ -0,0 +1,690 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test of parallel, find the best parallel chunksize and iteration seperation scheme.
Created on Wed Sep 26 12:09:34 2018

@author: ljia
"""

import sys
import time
from itertools import combinations_with_replacement, product, combinations
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm
import networkx as nx
import numpy as np
import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel
from matplotlib import pyplot as plt
from sklearn.model_selection import ParameterGrid

sys.path.insert(0, "../")
from pygraph.utils.utils import getSPGraph, direct_product
from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.kernels import deltakernel, kernelproduct


def spkernel(*args,
node_label='atom',
edge_weight=None,
node_kernels=None,
n_jobs=None,
chunksize=1):
"""Calculate shortest-path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
edge_weight : string
Edge attribute name corresponding to the edge weight.
node_kernels: dict
A dictionary of kernel functions for nodes, including 3 items: 'symb' for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' for both labels. The first 2 functions take two node labels as parameters, and the 'mix' function takes 4 parameters, a symbolic and a non-symbolic label for each the two nodes. Each label is in form of 2-D dimension array (n_samples, n_features). Each function returns an number as the kernel value. Ignored when nodes are unlabeled.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the sp kernel between 2 praphs.
"""
# pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]]

weight = None
if edge_weight is None:
pass
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, (float, int)):
weight = edge_weight
except:
pass
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'is_directed'],
node_label=node_label)

# remove graphs with no edges, as no sp can be found in their structures, so the kernel between such a graph and itself will be zero.
len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
if len(Gn) != len_gn:
print('\n %d graphs are removed as they don\'t contain edges.\n' %
(len_gn - len(Gn)))

start_time = time.time()

pool = Pool(n_jobs)
# get shortest path graphs of Gn
getsp_partial = partial(wrap_getSPGraph, Gn, weight)
for i, g in tqdm(
pool.imap_unordered(getsp_partial, range(0, len(Gn)), chunksize),
desc='getting sp graphs',
file=sys.stdout):
Gn[i] = g

Kmatrix = np.zeros((len(Gn), len(Gn)))

# ---- use pool.imap_unordered to parallel and track progress. ----
do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels)
itr = combinations_with_replacement(range(0, len(Gn)), 2)
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 100:
# chunksize, extra = divmod(len_itr, n_jobs * 4)
# if extra:
# chunksize += 1
# else:
# chunksize = 300
for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels',
file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()

run_time = time.time() - start_time
print(
"\n --- shortest path kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time, idx


def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij):

i = ij[0]
j = ij[1]
g1 = Gn[i]
g2 = Gn[j]
Kmatrix = 0

try:
# compute shortest path matrices first, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
# node symb labeled
else:
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
# node unlabeled
else:
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
Kmatrix += 1
return i, j, Kmatrix

# compute graph kernels
if ds_attrs['is_directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
# each edge walk is counted twice, starting from both its extreme nodes.
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
Kmatrix += kn1
else:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
# each edge walk is counted twice, starting from both its extreme nodes.
nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
e1[0], e2[1])], vk_dict[(e1[1],
e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kn2 = nk12 * nk21
Kmatrix += kn1 + kn2

except KeyError: # missing labels or attributes
pass

return i, j, Kmatrix


def wrap_getSPGraph(Gn, weight, i):
return i, getSPGraph(Gn[i], edge_weight=weight)


def commonwalkkernel(*args,
node_label='atom',
edge_label='bond_type',
n=None,
weight=1,
compute_method=None,
n_jobs=None,
chunksize=1):
"""Calculate common walk graph kernels between graphs.
"""
compute_method = compute_method.lower()
# arrange all graphs in a list
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
node_label=node_label,
edge_label=edge_label)
if not ds_attrs['node_labeled']:
for G in Gn:
nx.set_node_attributes(G, '0', 'atom')
if not ds_attrs['edge_labeled']:
for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type')
if not ds_attrs['is_directed']: # convert
Gn = [G.to_directed() for G in Gn]

start_time = time.time()

# ---- use pool.imap_unordered to parallel and track progress. ----
pool = Pool(n_jobs)
itr = combinations_with_replacement(range(0, len(Gn)), 2)
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 100:
# chunksize, extra = divmod(len_itr, n_jobs * 4)
# if extra:
# chunksize += 1
# else:
# chunksize = 100

# direct product graph method - exponential
if compute_method == 'exp':
do_partial = partial(_commonwalkkernel_exp, Gn, node_label, edge_label,
weight)
# direct product graph method - geometric
elif compute_method == 'geo':
do_partial = partial(_commonwalkkernel_geo, Gn, node_label, edge_label,
weight)

for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels',
file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()

run_time = time.time() - start_time
print(
"\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time


def _commonwalkkernel_exp(Gn, node_label, edge_label, beta, ij):
"""Calculate walk graph kernels up to n between 2 graphs using exponential
series.
"""
i = ij[0]
j = ij[1]
g1 = Gn[i]
g2 = Gn[j]

# get tensor product / direct product
gp = direct_product(g1, g2, node_label, edge_label)
A = nx.adjacency_matrix(gp).todense()

ew, ev = np.linalg.eig(A)
D = np.zeros((len(ew), len(ew)))
for i in range(len(ew)):
D[i][i] = np.exp(beta * ew[i])
exp_D = ev * D * ev.T

return i, j, exp_D.sum()


def _commonwalkkernel_geo(Gn, node_label, edge_label, gamma, ij):
"""Calculate common walk graph kernels up to n between 2 graphs using
geometric series.
"""
i = ij[0]
j = ij[1]
g1 = Gn[i]
g2 = Gn[j]

# get tensor product / direct product
gp = direct_product(g1, g2, node_label, edge_label)
A = nx.adjacency_matrix(gp).todense()
mat = np.identity(len(A)) - gamma * A
try:
return i, j, mat.I.sum()
except np.linalg.LinAlgError:
return i, j, np.nan


def compute_gram_matrices(datafile,
estimator,
param_grid_precomputed,
datafile_y=None,
extra_params=None,
ds_name='ds-unknown',
n_jobs=1,
chunksize=1):
"""

Parameters
----------
datafile : string
Path of dataset file.
estimator : function
kernel function used to estimate. This function needs to return a gram matrix.
param_grid_precomputed : dictionary
Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.
datafile_y : string
Path of file storing y data. This parameter is optional depending on the given dataset file.
"""
tqdm.monitor_interval = 0

# Load the dataset
dataset, y = loadDataset(
datafile, filename_y=datafile_y, extra_params=extra_params)

# Grid of parameters with a discrete number of values for each.
param_list_precomputed = list(ParameterGrid(param_grid_precomputed))

gram_matrix_time = [
] # a list to store time to calculate gram matrices

# calculate all gram matrices
for idx, params_out in enumerate(param_list_precomputed):
params_out['n_jobs'] = n_jobs
params_out['chunksize'] = chunksize
rtn_data = estimator(dataset, **params_out)
Kmatrix = rtn_data[0]
current_run_time = rtn_data[1]
# for some kernels, some graphs in datasets may not meet the
# kernels' requirements for graph structure. These graphs are trimmed.
if len(rtn_data) == 3:
idx_trim = rtn_data[2] # the index of trimmed graph list
y = [y[idx] for idx in idx_trim] # trim y accordingly

Kmatrix_diag = Kmatrix.diagonal().copy()
# remove graphs whose kernels with themselves are zeros
nb_g_ignore = 0
for idx, diag in enumerate(Kmatrix_diag):
if diag == 0:
Kmatrix = np.delete(Kmatrix, (idx - nb_g_ignore), axis=0)
Kmatrix = np.delete(Kmatrix, (idx - nb_g_ignore), axis=1)
nb_g_ignore += 1
# normalization
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]

gram_matrix_time.append(current_run_time)

average_gram_matrix_time = np.mean(gram_matrix_time)

return average_gram_matrix_time


def structuralspkernel(*args,
node_label='atom',
edge_weight=None,
edge_label='bond_type',
node_kernels=None,
edge_kernels=None,
n_jobs=None,
chunksize=1):
"""Calculate mean average structural shortest path kernels between graphs.
"""
# pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]]

weight = None
if edge_weight is None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, (float, int)):
weight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'],
node_label=node_label, edge_label=edge_label)

start_time = time.time()

# get shortest paths of each graph in Gn
splist = [[] for _ in range(len(Gn))]
pool = Pool(n_jobs)
# get shortest path graphs of Gn
getsp_partial = partial(wrap_getSP, Gn, weight, ds_attrs['is_directed'])
# if len(Gn) < 100:
# # use default chunksize as pool.map when iterable is less than 100
# chunksize, extra = divmod(len(Gn), n_jobs * 4)
# if extra:
# chunksize += 1
# else:
# chunksize = 100
# chunksize = 300 # int(len(list(itr)) / n_jobs)
for i, sp in tqdm(
pool.imap_unordered(getsp_partial, range(0, len(Gn)), chunksize),
desc='getting shortest paths',
file=sys.stdout):
splist[i] = sp

Kmatrix = np.zeros((len(Gn), len(Gn)))

# ---- use pool.imap_unordered to parallel and track progress. ----
do_partial = partial(structuralspkernel_do, Gn, splist, ds_attrs,
node_label, edge_label, node_kernels, edge_kernels)
itr = combinations_with_replacement(range(0, len(Gn)), 2)
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 100:
# chunksize, extra = divmod(len_itr, n_jobs * 4)
# if extra:
# chunksize += 1
# else:
# chunksize = 100
for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels',
file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()

run_time = time.time() - start_time
print(
"\n --- shortest path kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time


def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
node_kernels, edge_kernels, ij):

iglobal = ij[0]
jglobal = ij[1]
g1 = Gn[iglobal]
g2 = Gn[jglobal]
spl1 = splist[iglobal]
spl2 = splist[jglobal]
kernel = 0

try:
# First, compute shortest path matrices, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
# node symb labeled
else:
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
# node unlabeled
else:
vk_dict = {}

# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, it would be slow.
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
ek_dict = {} # dict of edge kernels
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke(
e1[2][edge_label], e2[2][edge_label],
[e1[2]['attributes']], [e2[2]['attributes']])
# edge symb labeled
else:
ke = edge_kernels['symb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke(
e1[2][edge_label], e2[2][edge_label])
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = kn(
[e1[2]['attributes']], [e2[2]['attributes']])
# edge unlabeled
else:
ek_dict = {}

# compute graph kernels
if vk_dict:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])] * \
ek_dict[((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
kernel += 1
else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= ek_dict[((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kernel += 1

kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
except KeyError: # missing labels or attributes
pass

return iglobal, jglobal, kernel


def get_shortest_paths(G, weight, directed):
"""Get all shortest paths of a graph.
"""
sp = []
for n1, n2 in combinations(G.nodes(), 2):
try:
sptemp = nx.shortest_path(G, n1, n2, weight=weight)
sp.append(sptemp)
# each edge walk is counted twice, starting from both its extreme nodes.
if not directed:
sp.append(sptemp[::-1])
except nx.NetworkXNoPath: # nodes not connected
# sp.append([])
pass
# add single nodes as length 0 paths.
sp += [[n] for n in G.nodes()]
return sp


def wrap_getSP(Gn, weight, directed, i):
return i, get_shortest_paths(Gn[i], weight, directed)



dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb
]

fig, ax = plt.subplots()
ax.set_xscale('log', nonposx='clip')
ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('parallel chunksize')
ax.set_ylabel('runtime($s$)')
ax.set_title('Runtime of the sp kernel on all datasets V.S. parallel chunksize')

estimator = structuralspkernel
if estimator.__name__ == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'node_kernels': [
{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}

elif estimator.__name__ == 'commonwalkkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'compute_method': ['geo'],
'weight': [1]}
elif estimator.__name__ == 'structuralspkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}

#list(range(10, 100, 20)) +
chunklist = list(range(10, 100, 20)) + list(range(100, 1000, 200)) + \
list(range(1000, 10000, 2000)) + list(range(10000, 100000, 20000))
# chunklist = list(range(300, 1000, 200)) + list(range(1000, 10000, 2000)) + list(range(10000, 100000, 20000))
gmtmat = np.zeros((len(dslist), len(chunklist)))

for idx1, ds in enumerate(dslist):
print()
print(ds['name'])

for idx2, cs in enumerate(chunklist):
print(ds['name'], idx2, cs)
gmtmat[idx1][idx2] = compute_gram_matrices(
ds['dataset'],
estimator,
param_grid_precomputed,

datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params']
if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
chunksize=cs)

print()
print(gmtmat[idx1, :])
np.save('test_parallel/' + estimator.__name__ + '.' + ds['name'],
gmtmat[idx1, :])

p = ax.plot(chunklist, gmtmat[idx1, :], '.-', label=ds['name'])
ax.legend(loc='upper center')
plt.savefig('test_parallel/' + estimator.__name__ + str(idx1) + '.eps',
format='eps', dpi=300)
# plt.show()

+ 2293
- 0
notebooks/test_parallel/0.eps
File diff suppressed because it is too large
View File


+ 2451
- 0
notebooks/test_parallel/1.eps
File diff suppressed because it is too large
View File


+ 2642
- 0
notebooks/test_parallel/2.eps
File diff suppressed because it is too large
View File


+ 2832
- 0
notebooks/test_parallel/3.eps
File diff suppressed because it is too large
View File


+ 3242
- 0
notebooks/test_parallel/4.eps
File diff suppressed because it is too large
View File


+ 3880
- 0
notebooks/test_parallel/5.eps
File diff suppressed because it is too large
View File


BIN
notebooks/test_parallel/Acyclic.npy View File


BIN
notebooks/test_parallel/Alkane.npy View File


+ 15
- 0
notebooks/test_parallel/ENZYMES.txt View File

@@ -0,0 +1,15 @@
0 100 12315.039321660995
1 200 12115.199783325195
2 300 10830.247281551361
3 500 10983.445399045944
4 700 10847.954899311066
5 900 10847.967393398285
6 1000 10858.62141251564
7 3000 11041.101693153381
8 5000 11311.387048959732
9 7000 11937.84876036644
10 9000 11969.92341041565
11 10000 12337.565557003021
12 30000 13055.397030115128
13 50000 14964.71178483963
14 70000 20204.410992860794

BIN
notebooks/test_parallel/Letter-med.npy View File


BIN
notebooks/test_parallel/MAO.npy View File


BIN
notebooks/test_parallel/MUTAG.npy View File


BIN
notebooks/test_parallel/PAH.npy View File


+ 14
- 0
notebooks/test_parallel/commonwalkkernel.Acyclic.txt View File

@@ -0,0 +1,14 @@
0 10 1045.7502884864807
1 30 1042.6204540729523
2 50 1058.7516617774963
3 70 10983.445399045944
4 90
5 100 1045.951178073883
6 300 1046.520814895629
7 500 1080.4295434951782
8 700 1062.4622604846954
9 900 1105.4361708164215
10 1000 1090.1234941482544
11 3000 1175.5646018981934
12 5000 993.7158119678497
13 7000

+ 14
- 0
notebooks/test_parallel/commonwalkkernel.Letter-med.txt View File

@@ -0,0 +1,14 @@
0 100 1044301 5:12
1 30
2 50 4889.178356409073
3 70 5086.932644605637
4 90 5096.774455308914
5 100 5189.769321680069
6 300 5199.769321680069
7 500
8 700 5206.7741804122925
9 900 5203.689619779587
10 1000
11 3000
12 5000


BIN
notebooks/test_parallel/commonwalkkernel.MAO.npy View File


+ 14
- 0
notebooks/test_parallel/commonwalkkernel.MUTAG.txt View File

@@ -0,0 +1,14 @@
0 10 5143.154480934143
1 30
2 50 4889.178356409073
3 70 5086.932644605637
4 90 5096.774455308914
5 100 5189.769321680069
6 300 5199.769321680069
7 500
8 700 5206.7741804122925
9 900 5203.689619779587
10 1000
11 3000
12 5000


BIN
notebooks/test_parallel/commonwalkkernel.PAH.npy View File


+ 2436
- 0
notebooks/test_parallel/commonwalkkernel0.eps
File diff suppressed because it is too large
View File


+ 2634
- 0
notebooks/test_parallel/commonwalkkernel1.eps
File diff suppressed because it is too large
View File


+ 2313
- 0
notebooks/test_parallel/myria/0.eps
File diff suppressed because it is too large
View File


+ 2471
- 0
notebooks/test_parallel/myria/1.eps
File diff suppressed because it is too large
View File


+ 2662
- 0
notebooks/test_parallel/myria/2.eps
File diff suppressed because it is too large
View File


+ 2852
- 0
notebooks/test_parallel/myria/3.eps
File diff suppressed because it is too large
View File


+ 3282
- 0
notebooks/test_parallel/myria/4.eps
File diff suppressed because it is too large
View File


+ 3920
- 0
notebooks/test_parallel/myria/5.eps
File diff suppressed because it is too large
View File


+ 2092
- 0
notebooks/test_parallel/myria/6.eps
File diff suppressed because it is too large
View File


BIN
notebooks/test_parallel/myria/Acyclic.npy View File


BIN
notebooks/test_parallel/myria/Alkane.npy View File


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save