Browse Source

* use global variable to tackle big read-only variables when they are used in subprocesses.

* rewrite the implementation of the marginalized kernel.
* implement four computing methods of the generalized random walk kernel.
* in the path kernel up to length h, use trie to save all paths, saving tremendous memories; use the Deep-first search to get paths from graphs.
* in model_selection_for_precomputed_kernel method, complete the part to do cross validation when Gram matrices are read from file.
* in get_dataset_attributes methods, correct three sub-methods about getting node degrees, add sub-methods to get fill factors of graphs.
* change default chunksize of pool.imap_unordered parallel method to 100.
* remove try... except blocks in case they hidden bugs.
v0.1
jajupmochi 6 years ago
parent
commit
1566139883
70 changed files with 116833 additions and 172150 deletions
  1. +3
    -0
      .gitignore
  2. +6
    -4
      notebooks/check_gm.py
  3. +0
    -22074
      notebooks/check_gm/Acyclic.gm.eps
  4. +0
    -41291
      notebooks/check_gm/Alkane.gm.eps
  5. +38845
    -19680
      notebooks/check_gm/Letter-med.gm.eps
  6. +0
    -41381
      notebooks/check_gm/MAO.gm.eps
  7. +39730
    -39799
      notebooks/check_gm/PAH.gm.eps
  8. +59
    -42
      notebooks/draw_running_time.py
  9. +143
    -77
      notebooks/get_dataset_attributes.ipynb
  10. +62
    -0
      notebooks/get_dataset_attributes.py
  11. +19
    -0
      notebooks/job_graphkernels.sl
  12. +12
    -0
      notebooks/job_test.sl
  13. BIN
      notebooks/libs.pyc
  14. +815
    -0
      notebooks/memory_profile.ipynb
  15. +28114
    -1487
      notebooks/plot_all_graphs.ipynb
  16. +3
    -4
      notebooks/run_commonwalkkernel.py
  17. +76
    -0
      notebooks/run_degree_differs_cw.py
  18. +78
    -0
      notebooks/run_degree_differs_ma.py
  19. +102
    -0
      notebooks/run_degree_differs_rw.py
  20. +77
    -0
      notebooks/run_degree_differs_sp.py
  21. +79
    -0
      notebooks/run_degree_differs_ssp.py
  22. +74
    -0
      notebooks/run_degree_differs_uhp.py
  23. +15
    -13
      notebooks/run_marginalizedkernel.py
  24. +1
    -1
      notebooks/run_randomwalkkernel.ipynb
  25. +110
    -0
      notebooks/run_randomwalkkernel.py
  26. +70
    -0
      notebooks/run_rwalk_symonly.py
  27. +61
    -0
      notebooks/run_sp_symonly.py
  28. +2
    -2
      notebooks/run_spkernel.py
  29. +47
    -0
      notebooks/run_ssp_symonly.py
  30. +308
    -0
      notebooks/run_structuralspkernel.ipynb
  31. +3
    -5
      notebooks/run_untilhpathkernel.py
  32. +86
    -0
      notebooks/run_vertex_differs_cw.py
  33. +83
    -0
      notebooks/run_vertex_differs_ma.py
  34. +108
    -0
      notebooks/run_vertex_differs_rw.py
  35. +83
    -0
      notebooks/run_vertex_differs_sp.py
  36. +85
    -0
      notebooks/run_vertex_differs_ssp.py
  37. +80
    -0
      notebooks/run_vertex_differs_uhp.py
  38. +47
    -0
      notebooks/test_mpi.py
  39. +482
    -484
      notebooks/test_parallel.py
  40. +189
    -410
      notebooks/test_parallel/myria/0.eps
  41. +2752
    -0
      notebooks/test_parallel/myria/28cpus/output_parallel28.txt
  42. +0
    -2092
      notebooks/test_parallel/myria/6.eps
  43. BIN
      notebooks/test_parallel/myria/structuralspkernel.Acyclic.npy
  44. BIN
      notebooks/test_parallel/myria/structuralspkernel.Alkane.npy
  45. BIN
      notebooks/test_parallel/myria/structuralspkernel.MAO.npy
  46. BIN
      notebooks/test_parallel/myria/structuralspkernel.MUTAG.npy
  47. BIN
      notebooks/test_parallel/myria/structuralspkernel.PAH.npy
  48. +48
    -48
      notebooks/test_parallel/myria/structuralspkernel0.eps
  49. +88
    -88
      notebooks/test_parallel/myria/structuralspkernel1.eps
  50. +156
    -136
      notebooks/test_parallel/myria/structuralspkernel2.eps
  51. +186
    -186
      notebooks/test_parallel/myria/structuralspkernel3.eps
  52. +226
    -226
      notebooks/test_parallel/myria/structuralspkernel4.eps
  53. +0
    -2100
      notebooks/test_parallel/myria/structuralspkernel5.eps
  54. +0
    -1
      pygraph/kernels/.##untildPathKernel.py#
  55. +72
    -48
      pygraph/kernels/commonWalkKernel.py
  56. +140
    -82
      pygraph/kernels/marginalizedKernel.py
  57. +641
    -126
      pygraph/kernels/randomWalkKernel.py
  58. +842
    -0
      pygraph/kernels/rwalk_sym.py
  59. +27
    -32
      pygraph/kernels/spKernel.py
  60. +200
    -0
      pygraph/kernels/sp_sym.py
  61. +464
    -0
      pygraph/kernels/ssp_sym.py
  62. +66
    -48
      pygraph/kernels/structuralspKernel.py
  63. +273
    -121
      pygraph/kernels/untilHPathKernel.py
  64. +28
    -3
      pygraph/utils/graphdataset.py
  65. +5
    -9
      pygraph/utils/kernels.py
  66. +204
    -49
      pygraph/utils/model_selection_precomputed.py
  67. +86
    -0
      pygraph/utils/openblassettings.py
  68. +60
    -0
      pygraph/utils/parallel.py
  69. +111
    -0
      pygraph/utils/trie.py
  70. +1
    -1
      pygraph/utils/utils.py

+ 3
- 0
.gitignore View File

@@ -4,6 +4,9 @@ datasets/*
!datasets/ds.py
notebooks/results/*
requirements/*
*.npy
*.eps
*.dat

__pycache__
##*#

+ 6
- 4
notebooks/check_gm.py View File

@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
from numpy.linalg import eig

# read gram matrices from file.
results_dir = 'results/untilhpathkernel/myria'
results_dir = 'results/marginalizedkernel'
ds_name = 'Letter-med'
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
#print('gm time: ', gmfile['gmtime'])
@@ -22,11 +22,13 @@ gram_matrices = gmfile['gms']
#y = gmfile['y'].tolist()
#x = gram_matrices[0]

for x in gram_matrices:
for idx, x in enumerate(gram_matrices):
print()
print(idx)
plt.imshow(x)
plt.colorbar()
plt.savefig('check_gm/' + ds_name + '.gm.eps', format='eps', dpi=300)
# print(np.transpose(x))
# print(np.transpose(x))
print('if symmetric: ', np.array_equal(x, np.transpose(x)))
print('diag: ', np.diag(x))
@@ -35,7 +37,7 @@ for x in gram_matrices:
print('min, max matrix: ', np.min(x), np.max(x))
for i in range(len(x)):
for j in range(len(x)):
if x[i][j] > 1:
if x[i][j] > 1 + 1e-9:
print(i, j)
raise Exception('value bigger than 1 with index', i, j)
print('mean x: ', np.mean(np.mean(x)))


+ 0
- 22074
notebooks/check_gm/Acyclic.gm.eps
File diff suppressed because it is too large
View File


+ 0
- 41291
notebooks/check_gm/Alkane.gm.eps
File diff suppressed because it is too large
View File


+ 38845
- 19680
notebooks/check_gm/Letter-med.gm.eps
File diff suppressed because it is too large
View File


+ 0
- 41381
notebooks/check_gm/MAO.gm.eps
File diff suppressed because it is too large
View File


+ 39730
- 39799
notebooks/check_gm/PAH.gm.eps
File diff suppressed because it is too large
View File


+ 59
- 42
notebooks/draw_running_time.py View File

@@ -10,60 +10,77 @@ Created on Mon Sep 24 17:37:26 2018
import numpy as np
import matplotlib.pyplot as plt

N = 6
tgm1 = [3.68,
2.24,
3.34,
# 0,
20.00,
2020.46,
3198.84]
tgm2 = [4.29,
3.35,
5.78,
# 11.21,
40.58,
3136.26,
17222.21]
tms1 = [51.19,
73.09,
5.01,
# 0,
22.87,
2211.97,
3211.58]
tms2 = [65.16,
53.02,
10.32,
# 1162.41,
49.86,
3931.68,
17270.55]
N = 7
tgm1 = np.array([0.73,
0.88,
1.65,
1.97,
4.89,
36.98,
704.54])
tgm2 = np.array([0.77,
1.22,
2.95,
5.70,
20.29,
147.09,
3477.65])
tms1 = np.array([2.68,
3.41,
3.36,
237.00,
7.58,
255.48,
717.35])
tms2 = np.array([3.93,
4.96,
5.84,
833.06,
26.62,
807.84,
3515.72])

fig, ax = plt.subplots()
fig, ax = plt.subplots(1, 1, figsize=(10.5, 4.2))

ind = np.arange(N) # the x locations for the groups
width = 0.30 # the width of the bars: can also be len(x) sequence
width = 0.23 # the width of the bars: can also be len(x) sequence

p1 = ax.bar(ind, tgm1, width, label='$t_{gm}$ CRIANN')
p2 = ax.bar(ind, tms1, width, bottom=tgm1, label='$t_{ms}$ CRIANN')
p3 = ax.bar(ind + width, tgm2, width, label='$t_{gm}$ laptop')
p4 = ax.bar(ind + width, tms2, width, bottom=tgm2, label='$t_{ms}$ laptop')
p1 = ax.bar(ind - width * 0.03, tgm1, width, label='compute Gram matrix on $CRIANN$ ($t_1$)', zorder=3)
p2 = ax.bar(ind - width * 0.03, tms1 - tgm1, width, bottom=tgm1, label='model selection on $CRIANN$', zorder=3)
p3 = ax.bar(ind + width * 1.03, tgm2, width, label='compute Gram matrix on $laptop$ ($t_2$)', zorder=3)
p4 = ax.bar(ind + width * 1.03, tms2 - tgm2, width, bottom=tgm2, label='model selection on $laptop$', zorder=3)

ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('datasets')
ax.set_ylabel('runtime($s$)')
ax.set_title('Runtime of the shortest path kernel on all datasets')
plt.xticks(ind + width / 2, ('Acyclic', 'Alkane', 'MAO', 'MUTAG', 'Letter-med', 'ENZYMES'))
#ax.set_title('Runtime of the shortest path kernel on all datasets')
plt.xticks(ind + width / 2, ('Alkane', 'Acyclic', 'MAO', 'PAH', 'MUTAG',
'Letter-med', 'ENZYMES'))
#ax.set_yticks(np.logspace(-16, -3, num=20, base=10))
#ax.set_ylim(bottom=1e-15)
ax.legend(loc='upper left')
ax.grid(axis='y', zorder=0)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.xaxis.set_ticks_position('none')

ax2 = ax.twinx()
p1 = ax2.plot(ind + width / 2, np.array(tgm2) / np.array(tgm1), 'ro-',
label='$t_{gm}$ laptop / $t_{gm}$ CRIANN')
p5 = ax2.plot(ind + width / 2, tgm2 / tgm1, 'bo-',
label='$t_2 / $ $t_1$')
ax2.set_ylabel('ratios')
ax2.legend(loc='upper center')
ax2.spines['top'].set_visible(False)
ax2.spines['bottom'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.xaxis.set_ticks_position('none')
ax2.yaxis.set_ticks_position('none'
)
ax.yaxis.set_ticks_position('none')

plt.savefig('check_gm/compare_running_time.eps', format='eps', dpi=300)
fig.subplots_adjust(right=0.63)
fig.legend(loc='right', ncol=1, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)

plt.savefig('check_gm/parallel_runtime_on_different_machines.eps', format='eps', dpi=300,
transparent=True, bbox_inches='tight')
plt.show()

+ 143
- 77
notebooks/get_dataset_attributes.ipynb View File

@@ -24,9 +24,12 @@
"ave_edge_num : 7.1530054644808745\n",
"min_edge_num : 2\n",
"max_edge_num : 10\n",
"ave_node_degree : 2.80327868852459\n",
"min_node_degree : 2\n",
"max_node_degree : 4\n",
"ave_node_degree : 1.737561012151176\n",
"min_node_degree : 1.3333333333333333\n",
"max_node_degree : 1.8181818181818181\n",
"ave_fill_factor : 0.11241161841596808\n",
"min_fill_factor : 0.08264462809917356\n",
"max_fill_factor : 0.2222222222222222\n",
"node_label_num : 3\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
@@ -46,9 +49,12 @@
"ave_edge_num : 7.873333333333333\n",
"min_edge_num : 0\n",
"max_edge_num : 9\n",
"ave_node_degree : 3.36\n",
"min_node_degree : 0\n",
"max_node_degree : 4\n",
"ave_node_degree : 1.7507830687830694\n",
"min_node_degree : 0.0\n",
"max_node_degree : 1.8\n",
"ave_fill_factor : 0.10199498404299989\n",
"min_fill_factor : 0.0\n",
"max_fill_factor : 0.25\n",
"node_label_num : 2\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
@@ -68,9 +74,12 @@
"ave_edge_num : 19.63235294117647\n",
"min_edge_num : 12\n",
"max_edge_num : 29\n",
"ave_node_degree : 3.0\n",
"min_node_degree : 3\n",
"max_node_degree : 3\n",
"ave_node_degree : 2.1347114940751464\n",
"min_node_degree : 2.090909090909091\n",
"max_node_degree : 2.2\n",
"ave_fill_factor : 0.060638921710159575\n",
"min_fill_factor : 0.039780521262002745\n",
"max_fill_factor : 0.09917355371900827\n",
"node_label_num : 3\n",
"edge_label_num : 4\n",
"node_attr_dim : 0\n",
@@ -90,9 +99,12 @@
"ave_edge_num : 24.425531914893618\n",
"min_edge_num : 11\n",
"max_edge_num : 34\n",
"ave_node_degree : 3.0106382978723403\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"ave_node_degree : 2.3550919704450077\n",
"min_node_degree : 2.2\n",
"max_node_degree : 2.5\n",
"ave_fill_factor : 0.05799294134806485\n",
"min_fill_factor : 0.04336734693877551\n",
"max_fill_factor : 0.11\n",
"node_label_num : 1\n",
"edge_label_num : 1\n",
"node_attr_dim : 0\n",
@@ -112,9 +124,12 @@
"ave_edge_num : 19.79255319148936\n",
"min_edge_num : 10\n",
"max_edge_num : 33\n",
"ave_node_degree : 3.00531914893617\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"ave_node_degree : 2.1887720785524962\n",
"min_node_degree : 2.0\n",
"max_node_degree : 2.4444444444444446\n",
"ave_fill_factor : 0.06480462822996713\n",
"min_fill_factor : 0.039540816326530615\n",
"max_fill_factor : 0.1\n",
"node_label_num : 7\n",
"edge_label_num : 11\n",
"node_attr_dim : 0\n",
@@ -134,9 +149,12 @@
"ave_edge_num : 3.2057777777777776\n",
"min_edge_num : 0\n",
"max_edge_num : 7\n",
"ave_node_degree : 2.012888888888889\n",
"min_node_degree : 0\n",
"max_node_degree : 4\n",
"ave_node_degree : 1.35270582010582\n",
"min_node_degree : 0.0\n",
"max_node_degree : 2.4\n",
"ave_fill_factor : 0.15517701625094482\n",
"min_fill_factor : 0.0\n",
"max_fill_factor : 0.3333333333333333\n",
"node_label_num : 0\n",
"edge_label_num : 0\n",
"node_attr_dim : 2\n",
@@ -156,9 +174,12 @@
"ave_edge_num : 62.13666666666666\n",
"min_edge_num : 1\n",
"max_edge_num : 149\n",
"ave_node_degree : 6.086666666666667\n",
"min_node_degree : 1\n",
"max_node_degree : 9\n",
"ave_node_degree : 3.862625314410413\n",
"min_node_degree : 0.32\n",
"max_node_degree : 5.230769230769231\n",
"ave_fill_factor : 0.07509817146721588\n",
"min_fill_factor : 0.0016\n",
"max_fill_factor : 0.375\n",
"node_label_num : 3\n",
"edge_label_num : 0\n",
"node_attr_dim : 18\n",
@@ -178,9 +199,12 @@
"ave_edge_num : 30.76942587041734\n",
"min_edge_num : 3\n",
"max_edge_num : 112\n",
"ave_node_degree : 3.75651371916071\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"ave_node_degree : 2.0379886162441148\n",
"min_node_degree : 0.47961630695443647\n",
"max_node_degree : 2.3703703703703702\n",
"ave_fill_factor : 0.0431047931997047\n",
"min_fill_factor : 0.0005750795047415305\n",
"max_fill_factor : 0.1875\n",
"node_label_num : 14\n",
"edge_label_num : 3\n",
"node_attr_dim : 0\n",
@@ -200,9 +224,12 @@
"ave_edge_num : 715.6587436332767\n",
"min_edge_num : 63\n",
"max_edge_num : 14267\n",
"ave_node_degree : 9.509337860780985\n",
"min_node_degree : 6\n",
"max_node_degree : 19\n",
"ave_node_degree : 4.979061662020889\n",
"min_node_degree : 3.6116504854368934\n",
"max_node_degree : 8.933333333333334\n",
"ave_fill_factor : 0.013790239865199101\n",
"min_fill_factor : 0.0004318164098347239\n",
"max_fill_factor : 0.09666666666666666\n",
"node_label_num : 82\n",
"edge_label_num : 0\n",
"node_attr_dim : 0\n",
@@ -222,9 +249,12 @@
"ave_edge_num : 16.195\n",
"min_edge_num : 1\n",
"max_edge_num : 103\n",
"ave_node_degree : 3.322\n",
"min_node_degree : 1\n",
"max_node_degree : 6\n",
"ave_node_degree : 2.012865369646626\n",
"min_node_degree : 0.6\n",
"max_node_degree : 2.8333333333333335\n",
"ave_fill_factor : 0.08679744688995196\n",
"min_fill_factor : 0.011412742382271468\n",
"max_fill_factor : 0.25\n",
"node_label_num : 38\n",
"edge_label_num : 3\n",
"node_attr_dim : 4\n",
@@ -244,9 +274,12 @@
"ave_edge_num : 3074.0975609756097\n",
"min_edge_num : 320\n",
"max_edge_num : 10888\n",
"ave_node_degree : 7.853658536585366\n",
"min_node_degree : 6\n",
"max_node_degree : 10\n",
"ave_node_degree : 4.503061007447199\n",
"min_node_degree : 4.191919191919192\n",
"max_node_degree : 4.776119402985074\n",
"ave_fill_factor : 0.003689884678097613\n",
"min_fill_factor : 0.00042914515176536197\n",
"max_fill_factor : 0.017821341055914458\n",
"node_label_num : 5\n",
"edge_label_num : 0\n",
"node_attr_dim : 1\n",
@@ -266,9 +299,12 @@
"ave_edge_num : 97.9366515837104\n",
"min_edge_num : 53\n",
"max_edge_num : 145\n",
"ave_node_degree : 10.158371040723981\n",
"min_node_degree : 8\n",
"max_node_degree : 16\n",
"ave_node_degree : 4.8153400199203436\n",
"min_node_degree : 4.176470588235294\n",
"max_node_degree : 5.576923076923077\n",
"ave_fill_factor : 0.06021937645679636\n",
"min_fill_factor : 0.04521181915272339\n",
"max_fill_factor : 0.0848\n",
"node_label_num : 10\n",
"edge_label_num : 0\n",
"node_attr_dim : 0\n",
@@ -288,9 +324,12 @@
"ave_edge_num : 198.32326820603907\n",
"min_edge_num : 121\n",
"max_edge_num : 405\n",
"ave_node_degree : 11.41563055062167\n",
"min_node_degree : 8\n",
"max_node_degree : 23\n",
"ave_node_degree : 5.102391320310953\n",
"min_node_degree : 4.04\n",
"max_node_degree : 6.6\n",
"ave_fill_factor : 0.03357132864022473\n",
"min_fill_factor : 0.01873405612244898\n",
"max_fill_factor : 0.04652056901191849\n",
"node_label_num : 22\n",
"edge_label_num : 0\n",
"node_attr_dim : 0\n",
@@ -310,9 +349,12 @@
"ave_edge_num : 196.0\n",
"min_edge_num : 196\n",
"max_edge_num : 196\n",
"ave_node_degree : 8.0\n",
"min_node_degree : 8\n",
"max_node_degree : 8\n",
"ave_node_degree : 3.9200000000000017\n",
"min_node_degree : 3.92\n",
"max_node_degree : 3.92\n",
"ave_fill_factor : 0.019600000000000003\n",
"min_fill_factor : 0.0196\n",
"max_fill_factor : 0.0196\n",
"node_label_num : 8\n",
"edge_label_num : 0\n",
"node_attr_dim : 1\n",
@@ -332,15 +374,24 @@
"ave_edge_num : 38.358024691358025\n",
"min_edge_num : 13\n",
"max_edge_num : 60\n",
"ave_node_degree : 3.8641975308641974\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"ave_node_degree : 2.1466610247664697\n",
"min_node_degree : 2.0\n",
"max_node_degree : 2.2777777777777777\n",
"ave_fill_factor : 0.0314385616191916\n",
"min_fill_factor : 0.017851646660510926\n",
"max_fill_factor : 0.07692307692307693\n",
"node_label_num : 10\n",
"edge_label_num : 0\n",
"node_attr_dim : 3\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"COX2:\n",
"substructures : {'non linear', 'linear'}\n",
@@ -354,9 +405,12 @@
"ave_edge_num : 43.44539614561028\n",
"min_edge_num : 34\n",
"max_edge_num : 59\n",
"ave_node_degree : 4.0\n",
"min_node_degree : 4\n",
"max_node_degree : 4\n",
"ave_node_degree : 2.1077350079995685\n",
"min_node_degree : 2.076923076923077\n",
"max_node_degree : 2.1739130434782608\n",
"ave_fill_factor : 0.025799177869507202\n",
"min_fill_factor : 0.01881377551020408\n",
"max_fill_factor : 0.033203125\n",
"node_label_num : 8\n",
"edge_label_num : 0\n",
"node_attr_dim : 3\n",
@@ -376,9 +430,12 @@
"ave_edge_num : 44.544973544973544\n",
"min_edge_num : 21\n",
"max_edge_num : 73\n",
"ave_node_degree : 3.955026455026455\n",
"min_node_degree : 3\n",
"max_node_degree : 4\n",
"ave_node_degree : 2.102359895640024\n",
"min_node_degree : 2.0338983050847457\n",
"max_node_degree : 2.2\n",
"ave_fill_factor : 0.026126638866896944\n",
"min_fill_factor : 0.0144812537195001\n",
"max_fill_factor : 0.0525\n",
"node_label_num : 9\n",
"edge_label_num : 0\n",
"node_attr_dim : 3\n",
@@ -398,9 +455,12 @@
"ave_edge_num : 72.8158131176999\n",
"min_edge_num : 5\n",
"max_edge_num : 1049\n",
"ave_node_degree : 5.794249775381851\n",
"min_node_degree : 3\n",
"max_node_degree : 25\n",
"ave_node_degree : 3.734642171150555\n",
"min_node_degree : 1.7142857142857142\n",
"max_node_degree : 5.071428571428571\n",
"ave_fill_factor : 0.09599853508460923\n",
"min_fill_factor : 0.0027289281997918834\n",
"max_fill_factor : 0.375\n",
"node_label_num : 3\n",
"edge_label_num : 0\n",
"node_attr_dim : 1\n",
@@ -420,21 +480,18 @@
"ave_edge_num : 72.8158131176999\n",
"min_edge_num : 5\n",
"max_edge_num : 1049\n",
"ave_node_degree : 5.794249775381851\n",
"min_node_degree : 3\n",
"max_node_degree : 25\n",
"ave_node_degree : 3.734642171150555\n",
"min_node_degree : 1.7142857142857142\n",
"max_node_degree : 5.071428571428571\n",
"ave_fill_factor : 0.09599853508460923\n",
"min_fill_factor : 0.0027289281997918834\n",
"max_fill_factor : 0.375\n",
"node_label_num : 3\n",
"edge_label_num : 0\n",
"node_attr_dim : 29\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"NCI1:\n",
"substructures : {'non linear', 'linear'}\n",
@@ -448,9 +505,12 @@
"ave_edge_num : 32.3\n",
"min_edge_num : 2\n",
"max_edge_num : 119\n",
"ave_node_degree : 3.3360097323600972\n",
"min_node_degree : 2\n",
"max_node_degree : 4\n",
"ave_node_degree : 2.155013792267071\n",
"min_node_degree : 0.8\n",
"max_node_degree : 2.769230769230769\n",
"ave_fill_factor : 0.04239828192835043\n",
"min_fill_factor : 0.009522961908152367\n",
"max_fill_factor : 0.2222222222222222\n",
"node_label_num : 37\n",
"edge_label_num : 0\n",
"node_attr_dim : 0\n",
@@ -470,17 +530,20 @@
"ave_edge_num : 32.13084565059365\n",
"min_edge_num : 3\n",
"max_edge_num : 119\n",
"ave_node_degree : 3.343833292948873\n",
"min_node_degree : 2\n",
"max_node_degree : 5\n",
"ave_node_degree : 2.156446168619097\n",
"min_node_degree : 1.0909090909090908\n",
"max_node_degree : 2.769230769230769\n",
"ave_fill_factor : 0.04263668408405519\n",
"min_fill_factor : 0.009522961908152367\n",
"max_fill_factor : 0.1875\n",
"node_label_num : 38\n",
"edge_label_num : 0\n",
"node_attr_dim : 0\n",
"edge_attr_dim : 0\n",
"class_number : 2\n",
"\n",
"load SDF: 100%|██████████| 4457424/4457424 [00:10<00:00, 430440.94it/s]\n",
"ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4352.25it/s] \n",
"load SDF: 100%|██████████| 4457424/4457424 [00:08<00:00, 522501.84it/s]\n",
"ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4625.31it/s] \n",
"\n",
"NCI-HIV:\n",
"substructures : {'non linear', 'linear'}\n",
@@ -494,9 +557,12 @@
"ave_edge_num : 47.7137903565906\n",
"min_edge_num : 1\n",
"max_edge_num : 441\n",
"ave_node_degree : 3.9760554800618526\n",
"min_node_degree : 1\n",
"max_node_degree : 12\n",
"ave_node_degree : 2.087755727203458\n",
"min_node_degree : 1.0\n",
"max_node_degree : 4.571428571428571\n",
"ave_fill_factor : 0.02739985514266206\n",
"min_fill_factor : 0.002298742728466879\n",
"max_fill_factor : 0.25\n",
"node_label_num : 63\n",
"edge_label_num : 3\n",
"node_attr_dim : 0\n",
@@ -580,7 +646,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.6.7"
}
},
"nbformat": 4,


+ 62
- 0
notebooks/get_dataset_attributes.py View File

@@ -0,0 +1,62 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 17 16:07:38 2018

@author: ljia
"""

import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.graphdataset import get_dataset_attributes

dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},
{'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},
{'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},
{'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},
{'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'},
{'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},
{'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},
{'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},
{'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'},
{'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},
{'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'},
{'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},
{'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},
{'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',},

# # not working below
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]

for ds in dslist:
dataset, y = loadDataset(
ds['dataset'],
filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
attrs = get_dataset_attributes(
dataset, target=y, node_label='atom', edge_label='bond_type')
print()
print(ds['name'] + ':')
for atr in attrs:
print(atr, ':', attrs[atr])
print()

+ 19
- 0
notebooks/job_graphkernels.sl View File

@@ -0,0 +1,19 @@
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="graphkernels"
#SBATCH --partition=tcourt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output=output_graphkernels.txt
#SBATCH --error=error_graphkernels.txt
#
#SBATCH --ntasks=1
#SBATCH --nodes=2
#SBATCH --cpus-per-task=56
#SBATCH --time=24:00:00
#SBATCH --mem-per-cpu=4000

srun hostname
srun cd /home/2017018/ljia01/py-graph/notebooks
srun python3 run_spkernel.py

+ 12
- 0
notebooks/job_test.sl View File

@@ -0,0 +1,12 @@
#!/bin/bash
#
#SBATCH --job-name=test
#SBATCH --output=res.txt
#SBATCH --partition=long
#
#SBATCH --ntasks=1
#SBATCH --time=10:00
#SBATCH --mem-per-cpu=100

srun hostname
srun sleep 60

BIN
notebooks/libs.pyc View File


+ 815
- 0
notebooks/memory_profile.ipynb View File

@@ -0,0 +1,815 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Acyclic\n",
"\n",
"--- This is a regression problem ---\n",
"\n",
"\n",
"1. Loading dataset from file...\n",
"\n",
"2. Calculating gram matrices. This could take a while...\n",
"\n",
" None edge weight specified. Set all weight to 1.\n",
"\n",
"getting sp graphs: 183it [00:00, 2198.32it/s]\n",
"calculating kernels: 16836it [00:17, 983.99it/s] \n",
"\n",
" --- shortest path kernel matrix of size 183 built in 17.32457208633423 seconds ---\n",
"\n",
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f63ab934158>, 'nsymb': <function gaussiankernel at 0x7f63ab9987b8>, 'mix': functools.partial(<function kernelproduct at 0x7f63ab951158>, <function deltakernel at 0x7f63ab934158>, <function gaussiankernel at 0x7f63ab9987b8>)}, 'n_jobs': 8} is: \n",
"\n",
"1 gram matrices are calculated, 0 of which are ignored.\n",
"\n",
"3. Fitting and predicting using nested cross validation. This could really take a while...\n",
"cross validation: 30it [00:12, 2.48it/s]\n",
"\n",
"4. Getting final performance...\n",
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f63ab934158>, 'nsymb': <function gaussiankernel at 0x7f63ab9987b8>, 'mix': functools.partial(<function kernelproduct at 0x7f63ab951158>, <function deltakernel at 0x7f63ab934158>, <function gaussiankernel at 0x7f63ab9987b8>)}, 'n_jobs': 8}]\n",
"best_params_in: [{'alpha': 3.1622776601683795e-10}]\n",
"\n",
"best_val_perf: 9.64631220504699\n",
"best_val_std: 0.6555235266552757\n",
"final_performance: [9.306976995404987]\n",
"final_confidence: [2.317244919360123]\n",
"train_performance: [6.190191405968441]\n",
"train_std: [0.21512408952827894]\n",
"\n",
"time to calculate gram matrix with different hyper-params: 17.32±nans\n",
"time to calculate best gram matrix: 17.32±nans\n",
"total training time with all hyper-param choices: 33.16s\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n",
" keepdims=keepdims)\n",
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n",
" ret = ret.dtype.type(ret / rcount)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Filename: ../pygraph/utils/model_selection_precomputed.py\n",
"\n",
"Line # Mem usage Increment Line Contents\n",
"================================================\n",
" 24 115.1 MiB 115.1 MiB @profile\n",
" 25 def model_selection_for_precomputed_kernel(datafile,\n",
" 26 estimator,\n",
" 27 param_grid_precomputed,\n",
" 28 param_grid,\n",
" 29 model_type,\n",
" 30 NUM_TRIALS=30,\n",
" 31 datafile_y=None,\n",
" 32 extra_params=None,\n",
" 33 ds_name='ds-unknown',\n",
" 34 n_jobs=1,\n",
" 35 read_gm_from_file=False):\n",
" 36 \"\"\"Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results.\n",
" 37 \n",
" 38 Parameters\n",
" 39 ----------\n",
" 40 datafile : string\n",
" 41 Path of dataset file.\n",
" 42 estimator : function\n",
" 43 kernel function used to estimate. This function needs to return a gram matrix.\n",
" 44 param_grid_precomputed : dictionary\n",
" 45 Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n",
" 46 param_grid : dictionary\n",
" 47 Dictionary with names (string) of parameters used as penelties as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n",
" 48 model_type : string\n",
" 49 Typr of the problem, can be regression or classification.\n",
" 50 NUM_TRIALS : integer\n",
" 51 Number of random trials of outer cv loop. The default is 30.\n",
" 52 datafile_y : string\n",
" 53 Path of file storing y data. This parameter is optional depending on the given dataset file.\n",
" 54 read_gm_from_file : boolean\n",
" 55 Whether gram matrices are loaded from file.\n",
" 56 \n",
" 57 Examples\n",
" 58 --------\n",
" 59 >>> import numpy as np\n",
" 60 >>> import sys\n",
" 61 >>> sys.path.insert(0, \"../\")\n",
" 62 >>> from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n",
" 63 >>> from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n",
" 64 >>>\n",
" 65 >>> datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
" 66 >>> estimator = weisfeilerlehmankernel\n",
" 67 >>> param_grid_precomputed = {'height': [0,1,2,3,4,5,6,7,8,9,10], 'base_kernel': ['subtree']}\n",
" 68 >>> param_grid = {\"alpha\": np.logspace(-2, 2, num = 10, base = 10)}\n",
" 69 >>>\n",
" 70 >>> model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, 'regression')\n",
" 71 \"\"\"\n",
" 72 115.1 MiB 0.0 MiB tqdm.monitor_interval = 0\n",
" 73 \n",
" 74 115.1 MiB 0.0 MiB results_dir = '../notebooks/results/' + estimator.__name__\n",
" 75 115.1 MiB 0.0 MiB if not os.path.exists(results_dir):\n",
" 76 os.makedirs(results_dir)\n",
" 77 # a string to save all the results.\n",
" 78 115.1 MiB 0.0 MiB str_fw = '###################### log time: ' + datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\") + '. ######################\\n\\n'\n",
" 79 115.1 MiB 0.0 MiB str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\\n# including gram matrices, serial numbers for gram matrix figures and performance.\\n\\n'\n",
" 80 \n",
" 81 # setup the model type\n",
" 82 115.1 MiB 0.0 MiB model_type = model_type.lower()\n",
" 83 115.1 MiB 0.0 MiB if model_type != 'regression' and model_type != 'classification':\n",
" 84 raise Exception(\n",
" 85 'The model type is incorrect! Please choose from regression or classification.'\n",
" 86 )\n",
" 87 115.1 MiB 0.0 MiB print()\n",
" 88 115.1 MiB 0.0 MiB print('--- This is a %s problem ---' % model_type)\n",
" 89 115.1 MiB 0.0 MiB str_fw += 'This is a %s problem.\\n' % model_type\n",
" 90 \n",
" 91 # calculate gram matrices rather than read them from file.\n",
" 92 115.1 MiB 0.0 MiB if read_gm_from_file == False:\n",
" 93 # Load the dataset\n",
" 94 115.1 MiB 0.0 MiB print()\n",
" 95 115.1 MiB 0.0 MiB print('\\n1. Loading dataset from file...')\n",
" 96 115.1 MiB 0.0 MiB if isinstance(datafile, str):\n",
" 97 115.1 MiB 0.0 MiB dataset, y_all = loadDataset(\n",
" 98 116.3 MiB 1.2 MiB datafile, filename_y=datafile_y, extra_params=extra_params)\n",
" 99 else: # load data directly from variable.\n",
" 100 dataset = datafile\n",
" 101 y_all = datafile_y \n",
" 102 \n",
" 103 # import matplotlib.pyplot as plt\n",
" 104 # import networkx as nx\n",
" 105 # nx.draw_networkx(dataset[30])\n",
" 106 # plt.show()\n",
" 107 \n",
" 108 # Grid of parameters with a discrete number of values for each.\n",
" 109 116.3 MiB 0.0 MiB param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n",
" 110 116.3 MiB 0.0 MiB param_list = list(ParameterGrid(param_grid))\n",
" 111 \n",
" 112 116.3 MiB 0.0 MiB gram_matrices = [\n",
" 113 ] # a list to store gram matrices for all param_grid_precomputed\n",
" 114 116.3 MiB 0.0 MiB gram_matrix_time = [\n",
" 115 ] # a list to store time to calculate gram matrices\n",
" 116 116.3 MiB 0.0 MiB param_list_pre_revised = [\n",
" 117 ] # list to store param grids precomputed ignoring the useless ones\n",
" 118 \n",
" 119 # calculate all gram matrices\n",
" 120 116.3 MiB 0.0 MiB print()\n",
" 121 116.3 MiB 0.0 MiB print('2. Calculating gram matrices. This could take a while...')\n",
" 122 116.3 MiB 0.0 MiB str_fw += '\\nII. Gram matrices.\\n\\n'\n",
" 123 116.3 MiB 0.0 MiB tts = time.time() # start training time\n",
" 124 116.3 MiB 0.0 MiB nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)\n",
" 125 144.8 MiB 0.0 MiB for idx, params_out in enumerate(param_list_precomputed):\n",
" 126 116.3 MiB 0.0 MiB y = y_all[:]\n",
" 127 116.3 MiB 0.0 MiB params_out['n_jobs'] = n_jobs\n",
" 128 # print(dataset)\n",
" 129 # import networkx as nx\n",
" 130 # nx.draw_networkx(dataset[1])\n",
" 131 # plt.show()\n",
" 132 119.1 MiB 2.8 MiB rtn_data = estimator(dataset[:], **params_out)\n",
" 133 119.1 MiB 0.0 MiB Kmatrix = rtn_data[0]\n",
" 134 119.1 MiB 0.0 MiB current_run_time = rtn_data[1]\n",
" 135 # for some kernels, some graphs in datasets may not meet the \n",
" 136 # kernels' requirements for graph structure. These graphs are trimmed. \n",
" 137 119.1 MiB 0.0 MiB if len(rtn_data) == 3:\n",
" 138 119.1 MiB 0.0 MiB idx_trim = rtn_data[2] # the index of trimmed graph list\n",
" 139 119.1 MiB 0.0 MiB y = [y[idxt] for idxt in idx_trim] # trim y accordingly\n",
" 140 # Kmatrix = np.random.rand(2250, 2250)\n",
" 141 # current_run_time = 0.1\n",
" 142 \n",
" 143 119.1 MiB 0.0 MiB Kmatrix_diag = Kmatrix.diagonal().copy()\n",
" 144 # remove graphs whose kernels with themselves are zeros\n",
" 145 119.1 MiB 0.0 MiB nb_g_ignore = 0\n",
" 146 119.1 MiB 0.0 MiB for idxk, diag in enumerate(Kmatrix_diag):\n",
" 147 119.1 MiB 0.0 MiB if diag == 0:\n",
" 148 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)\n",
" 149 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)\n",
" 150 nb_g_ignore += 1\n",
" 151 # normalization\n",
" 152 119.1 MiB 0.0 MiB for i in range(len(Kmatrix)):\n",
" 153 119.1 MiB 0.0 MiB for j in range(i, len(Kmatrix)):\n",
" 154 119.1 MiB 0.0 MiB Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])\n",
" 155 119.1 MiB 0.0 MiB Kmatrix[j][i] = Kmatrix[i][j]\n",
" 156 \n",
" 157 119.1 MiB 0.0 MiB print()\n",
" 158 119.1 MiB 0.0 MiB if params_out == {}:\n",
" 159 print('the gram matrix is: ')\n",
" 160 str_fw += 'the gram matrix is:\\n\\n'\n",
" 161 else:\n",
" 162 119.1 MiB 0.0 MiB print('the gram matrix with parameters', params_out, 'is: ')\n",
" 163 119.1 MiB 0.0 MiB str_fw += 'the gram matrix with parameters %s is:\\n\\n' % params_out\n",
" 164 119.1 MiB 0.0 MiB if len(Kmatrix) < 2:\n",
" 165 nb_gm_ignore += 1\n",
" 166 print('ignored, as at most only one of all its diagonal value is non-zero.')\n",
" 167 str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\\n\\n'\n",
" 168 else: \n",
" 169 119.1 MiB 0.0 MiB if np.isnan(Kmatrix).any(\n",
" 170 ): # if the matrix contains elements that are not numbers\n",
" 171 nb_gm_ignore += 1\n",
" 172 print('ignored, as it contains elements that are not numbers.')\n",
" 173 str_fw += 'ignored, as it contains elements that are not numbers.\\n\\n'\n",
" 174 else:\n",
" 175 # print(Kmatrix)\n",
" 176 119.1 MiB 0.0 MiB str_fw += np.array2string(\n",
" 177 119.1 MiB 0.0 MiB Kmatrix,\n",
" 178 119.1 MiB 0.0 MiB separator=',') + '\\n\\n'\n",
" 179 # separator=',',\n",
" 180 # threshold=np.inf,\n",
" 181 # floatmode='unique') + '\\n\\n'\n",
" 182 \n",
" 183 119.1 MiB 0.0 MiB fig_file_name = results_dir + '/GM[ds]' + ds_name\n",
" 184 119.1 MiB 0.0 MiB if params_out != {}:\n",
" 185 119.1 MiB 0.0 MiB fig_file_name += '[params]' + str(idx)\n",
" 186 119.8 MiB 0.7 MiB plt.imshow(Kmatrix)\n",
" 187 119.9 MiB 0.1 MiB plt.colorbar()\n",
" 188 144.8 MiB 24.9 MiB plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)\n",
" 189 # plt.show()\n",
" 190 144.8 MiB 0.0 MiB plt.clf()\n",
" 191 144.8 MiB 0.0 MiB gram_matrices.append(Kmatrix)\n",
" 192 144.8 MiB 0.0 MiB gram_matrix_time.append(current_run_time)\n",
" 193 144.8 MiB 0.0 MiB param_list_pre_revised.append(params_out)\n",
" 194 144.8 MiB 0.0 MiB if nb_g_ignore > 0:\n",
" 195 print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)\n",
" 196 str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore\n",
" 197 144.8 MiB 0.0 MiB print()\n",
" 198 144.8 MiB 0.0 MiB print(\n",
" 199 144.8 MiB 0.0 MiB '{} gram matrices are calculated, {} of which are ignored.'.format(\n",
" 200 144.8 MiB 0.0 MiB len(param_list_precomputed), nb_gm_ignore))\n",
" 201 144.8 MiB 0.0 MiB str_fw += '{} gram matrices are calculated, {} of which are ignored.\\n\\n'.format(len(param_list_precomputed), nb_gm_ignore)\n",
" 202 144.8 MiB 0.0 MiB str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\\n\\n'\n",
" 203 144.8 MiB 0.0 MiB str_fw += ''.join([\n",
" 204 144.8 MiB 0.0 MiB '{}: {}\\n'.format(idx, params_out)\n",
" 205 144.8 MiB 0.0 MiB for idx, params_out in enumerate(param_list_precomputed)\n",
" 206 ])\n",
" 207 \n",
" 208 144.8 MiB 0.0 MiB print()\n",
" 209 144.8 MiB 0.0 MiB if len(gram_matrices) == 0:\n",
" 210 print('all gram matrices are ignored, no results obtained.')\n",
" 211 str_fw += '\\nall gram matrices are ignored, no results obtained.\\n\\n'\n",
" 212 else:\n",
" 213 # save gram matrices to file.\n",
" 214 144.8 MiB 0.0 MiB np.savez(results_dir + '/' + ds_name + '.gm', \n",
" 215 144.8 MiB 0.0 MiB gms=gram_matrices, params=param_list_pre_revised, y=y, \n",
" 216 144.9 MiB 0.1 MiB gmtime=gram_matrix_time)\n",
" 217 \n",
" 218 144.9 MiB 0.0 MiB print(\n",
" 219 144.9 MiB 0.0 MiB '3. Fitting and predicting using nested cross validation. This could really take a while...'\n",
" 220 )\n",
" 221 \n",
" 222 # ---- use pool.imap_unordered to parallel and track progress. ----\n",
" 223 # train_pref = []\n",
" 224 # val_pref = []\n",
" 225 # test_pref = []\n",
" 226 # def func_assign(result, var_to_assign):\n",
" 227 # for idx, itm in enumerate(var_to_assign):\n",
" 228 # itm.append(result[idx]) \n",
" 229 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n",
" 230 # \n",
" 231 # parallel_me(trial_do_partial, range(NUM_TRIALS), func_assign, \n",
" 232 # [train_pref, val_pref, test_pref], glbv=gram_matrices,\n",
" 233 # method='imap_unordered', n_jobs=n_jobs, chunksize=1,\n",
" 234 # itr_desc='cross validation')\n",
" 235 \n",
" 236 144.9 MiB 0.0 MiB def init_worker(gms_toshare):\n",
" 237 global G_gms\n",
" 238 G_gms = gms_toshare\n",
" 239 \n",
" 240 # gram_matrices = np.array(gram_matrices)\n",
" 241 # gms_shape = gram_matrices.shape\n",
" 242 # gms_array = Array('d', np.reshape(gram_matrices.copy(), -1, order='C'))\n",
" 243 # pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gms_array, gms_shape))\n",
" 244 144.9 MiB 0.1 MiB pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n",
" 245 144.9 MiB 0.0 MiB trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n",
" 246 144.9 MiB 0.0 MiB train_pref = []\n",
" 247 144.9 MiB 0.0 MiB val_pref = []\n",
" 248 144.9 MiB 0.0 MiB test_pref = []\n",
" 249 # if NUM_TRIALS < 1000 * n_jobs:\n",
" 250 # chunksize = int(NUM_TRIALS / n_jobs) + 1\n",
" 251 # else:\n",
" 252 # chunksize = 1000\n",
" 253 144.9 MiB 0.0 MiB chunksize = 1\n",
" 254 145.1 MiB 0.1 MiB for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n",
" 255 145.1 MiB 0.0 MiB train_pref.append(o1)\n",
" 256 145.1 MiB 0.0 MiB val_pref.append(o2)\n",
" 257 145.1 MiB 0.0 MiB test_pref.append(o3)\n",
" 258 145.1 MiB 0.0 MiB pool.close()\n",
" 259 145.1 MiB 0.0 MiB pool.join()\n",
" 260 \n",
" 261 # # ---- use pool.map to parallel. ----\n",
" 262 # pool = Pool(n_jobs)\n",
" 263 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y[0:250], model_type)\n",
" 264 # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n",
" 265 # train_pref = [item[0] for item in result_perf]\n",
" 266 # val_pref = [item[1] for item in result_perf]\n",
" 267 # test_pref = [item[2] for item in result_perf]\n",
" 268 \n",
" 269 # # ---- direct running, normally use a single CPU core. ----\n",
" 270 # train_pref = []\n",
" 271 # val_pref = []\n",
" 272 # test_pref = []\n",
" 273 # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n",
" 274 # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n",
" 275 # train_pref.append(o1)\n",
" 276 # val_pref.append(o2)\n",
" 277 # test_pref.append(o3)\n",
" 278 # print()\n",
" 279 \n",
" 280 145.1 MiB 0.0 MiB print()\n",
" 281 145.1 MiB 0.0 MiB print('4. Getting final performance...')\n",
" 282 145.1 MiB 0.0 MiB str_fw += '\\nIII. Performance.\\n\\n'\n",
" 283 # averages and confidences of performances on outer trials for each combination of parameters\n",
" 284 145.1 MiB 0.0 MiB average_train_scores = np.mean(train_pref, axis=0)\n",
" 285 # print('val_pref: ', val_pref[0][0])\n",
" 286 145.1 MiB 0.0 MiB average_val_scores = np.mean(val_pref, axis=0)\n",
" 287 # print('test_pref: ', test_pref[0][0])\n",
" 288 145.1 MiB 0.0 MiB average_perf_scores = np.mean(test_pref, axis=0)\n",
" 289 # sample std is used here\n",
" 290 145.1 MiB 0.0 MiB std_train_scores = np.std(train_pref, axis=0, ddof=1)\n",
" 291 145.1 MiB 0.0 MiB std_val_scores = np.std(val_pref, axis=0, ddof=1)\n",
" 292 145.1 MiB 0.0 MiB std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n",
" 293 \n",
" 294 145.1 MiB 0.0 MiB if model_type == 'regression':\n",
" 295 145.1 MiB 0.0 MiB best_val_perf = np.amin(average_val_scores)\n",
" 296 else:\n",
" 297 best_val_perf = np.amax(average_val_scores)\n",
" 298 # print('average_val_scores: ', average_val_scores)\n",
" 299 # print('best_val_perf: ', best_val_perf)\n",
" 300 # print()\n",
" 301 145.1 MiB 0.0 MiB best_params_index = np.where(average_val_scores == best_val_perf)\n",
" 302 # find smallest val std with best val perf.\n",
" 303 best_val_stds = [\n",
" 304 145.1 MiB 0.0 MiB std_val_scores[value][best_params_index[1][idx]]\n",
" 305 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n",
" 306 ]\n",
" 307 145.1 MiB 0.0 MiB min_val_std = np.amin(best_val_stds)\n",
" 308 145.1 MiB 0.0 MiB best_params_index = np.where(std_val_scores == min_val_std)\n",
" 309 best_params_out = [\n",
" 310 145.1 MiB 0.0 MiB param_list_pre_revised[i] for i in best_params_index[0]\n",
" 311 ]\n",
" 312 145.1 MiB 0.0 MiB best_params_in = [param_list[i] for i in best_params_index[1]]\n",
" 313 145.1 MiB 0.0 MiB print('best_params_out: ', best_params_out)\n",
" 314 145.1 MiB 0.0 MiB print('best_params_in: ', best_params_in)\n",
" 315 145.1 MiB 0.0 MiB print()\n",
" 316 145.1 MiB 0.0 MiB print('best_val_perf: ', best_val_perf)\n",
" 317 145.1 MiB 0.0 MiB print('best_val_std: ', min_val_std)\n",
" 318 145.1 MiB 0.0 MiB str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n",
" 319 145.1 MiB 0.0 MiB str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n",
" 320 145.1 MiB 0.0 MiB str_fw += 'best_val_perf: %s\\n' % best_val_perf\n",
" 321 145.1 MiB 0.0 MiB str_fw += 'best_val_std: %s\\n' % min_val_std\n",
" 322 \n",
" 323 # print(best_params_index)\n",
" 324 # print(best_params_index[0])\n",
" 325 # print(average_perf_scores)\n",
" 326 final_performance = [\n",
" 327 145.1 MiB 0.0 MiB average_perf_scores[value][best_params_index[1][idx]]\n",
" 328 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n",
" 329 ]\n",
" 330 final_confidence = [\n",
" 331 145.1 MiB 0.0 MiB std_perf_scores[value][best_params_index[1][idx]]\n",
" 332 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n",
" 333 ]\n",
" 334 145.1 MiB 0.0 MiB print('final_performance: ', final_performance)\n",
" 335 145.1 MiB 0.0 MiB print('final_confidence: ', final_confidence)\n",
" 336 145.1 MiB 0.0 MiB str_fw += 'final_performance: %s\\n' % final_performance\n",
" 337 145.1 MiB 0.0 MiB str_fw += 'final_confidence: %s\\n' % final_confidence\n",
" 338 train_performance = [\n",
" 339 145.1 MiB 0.0 MiB average_train_scores[value][best_params_index[1][idx]]\n",
" 340 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n",
" 341 ]\n",
" 342 train_std = [\n",
" 343 145.1 MiB 0.0 MiB std_train_scores[value][best_params_index[1][idx]]\n",
" 344 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n",
" 345 ]\n",
" 346 145.1 MiB 0.0 MiB print('train_performance: %s' % train_performance)\n",
" 347 145.1 MiB 0.0 MiB print('train_std: ', train_std)\n",
" 348 145.1 MiB 0.0 MiB str_fw += 'train_performance: %s\\n' % train_performance\n",
" 349 145.1 MiB 0.0 MiB str_fw += 'train_std: %s\\n\\n' % train_std\n",
" 350 \n",
" 351 145.1 MiB 0.0 MiB print()\n",
" 352 145.1 MiB 0.0 MiB tt_total = time.time() - tts # training time for all hyper-parameters\n",
" 353 145.1 MiB 0.0 MiB average_gram_matrix_time = np.mean(gram_matrix_time)\n",
" 354 145.1 MiB 0.0 MiB std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n",
" 355 best_gram_matrix_time = [\n",
" 356 145.1 MiB 0.0 MiB gram_matrix_time[i] for i in best_params_index[0]\n",
" 357 ]\n",
" 358 145.1 MiB 0.0 MiB ave_bgmt = np.mean(best_gram_matrix_time)\n",
" 359 145.1 MiB 0.0 MiB std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n",
" 360 145.1 MiB 0.0 MiB print(\n",
" 361 145.1 MiB 0.0 MiB 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n",
" 362 145.1 MiB 0.0 MiB .format(average_gram_matrix_time, std_gram_matrix_time))\n",
" 363 145.1 MiB 0.0 MiB print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n",
" 364 145.1 MiB 0.0 MiB ave_bgmt, std_bgmt))\n",
" 365 145.1 MiB 0.0 MiB print(\n",
" 366 145.1 MiB 0.0 MiB 'total training time with all hyper-param choices: {:.2f}s'.format(\n",
" 367 145.1 MiB 0.0 MiB tt_total))\n",
" 368 145.1 MiB 0.0 MiB str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n",
" 369 145.1 MiB 0.0 MiB str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n",
" 370 145.1 MiB 0.0 MiB str_fw += 'total training time with all hyper-param choices: {:.2f}s\\n\\n'.format(tt_total)\n",
" 371 \n",
" 372 # # save results to file\n",
" 373 # np.savetxt(results_name_pre + 'average_train_scores.dt',\n",
" 374 # average_train_scores)\n",
" 375 # np.savetxt(results_name_pre + 'average_val_scores', average_val_scores)\n",
" 376 # np.savetxt(results_name_pre + 'average_perf_scores.dt',\n",
" 377 # average_perf_scores)\n",
" 378 # np.savetxt(results_name_pre + 'std_train_scores.dt', std_train_scores)\n",
" 379 # np.savetxt(results_name_pre + 'std_val_scores.dt', std_val_scores)\n",
" 380 # np.savetxt(results_name_pre + 'std_perf_scores.dt', std_perf_scores)\n",
" 381 \n",
" 382 # np.save(results_name_pre + 'best_params_index', best_params_index)\n",
" 383 # np.save(results_name_pre + 'best_params_pre.dt', best_params_out)\n",
" 384 # np.save(results_name_pre + 'best_params_in.dt', best_params_in)\n",
" 385 # np.save(results_name_pre + 'best_val_perf.dt', best_val_perf)\n",
" 386 # np.save(results_name_pre + 'best_val_std.dt', best_val_std)\n",
" 387 # np.save(results_name_pre + 'final_performance.dt', final_performance)\n",
" 388 # np.save(results_name_pre + 'final_confidence.dt', final_confidence)\n",
" 389 # np.save(results_name_pre + 'train_performance.dt', train_performance)\n",
" 390 # np.save(results_name_pre + 'train_std.dt', train_std)\n",
" 391 \n",
" 392 # np.save(results_name_pre + 'gram_matrix_time.dt', gram_matrix_time)\n",
" 393 # np.save(results_name_pre + 'average_gram_matrix_time.dt',\n",
" 394 # average_gram_matrix_time)\n",
" 395 # np.save(results_name_pre + 'std_gram_matrix_time.dt',\n",
" 396 # std_gram_matrix_time)\n",
" 397 # np.save(results_name_pre + 'best_gram_matrix_time.dt',\n",
" 398 # best_gram_matrix_time)\n",
" 399 \n",
" 400 # print out as table.\n",
" 401 145.1 MiB 0.0 MiB from collections import OrderedDict\n",
" 402 145.1 MiB 0.0 MiB from tabulate import tabulate\n",
" 403 145.1 MiB 0.0 MiB table_dict = {}\n",
" 404 145.1 MiB 0.0 MiB if model_type == 'regression':\n",
" 405 145.1 MiB 0.0 MiB for param_in in param_list:\n",
" 406 145.1 MiB 0.0 MiB param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n",
" 407 else:\n",
" 408 for param_in in param_list:\n",
" 409 param_in['C'] = '{:.2e}'.format(param_in['C'])\n",
" 410 145.1 MiB 0.0 MiB table_dict['params'] = [{**param_out, **param_in}\n",
" 411 145.1 MiB 0.0 MiB for param_in in param_list for param_out in param_list_pre_revised]\n",
" 412 table_dict['gram_matrix_time'] = [\n",
" 413 145.1 MiB 0.0 MiB '{:.2f}'.format(gram_matrix_time[index_out])\n",
" 414 145.1 MiB 0.0 MiB for param_in in param_list\n",
" 415 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n",
" 416 ]\n",
" 417 table_dict['valid_perf'] = [\n",
" 418 145.1 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n",
" 419 std_val_scores[index_out][index_in])\n",
" 420 145.1 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n",
" 421 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n",
" 422 ]\n",
" 423 table_dict['test_perf'] = [\n",
" 424 145.1 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n",
" 425 std_perf_scores[index_out][index_in])\n",
" 426 145.1 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n",
" 427 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n",
" 428 ]\n",
" 429 table_dict['train_perf'] = [\n",
" 430 145.1 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n",
" 431 std_train_scores[index_out][index_in])\n",
" 432 145.1 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n",
" 433 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n",
" 434 ]\n",
" 435 keyorder = [\n",
" 436 145.1 MiB 0.0 MiB 'params', 'train_perf', 'valid_perf', 'test_perf',\n",
" 437 145.1 MiB 0.0 MiB 'gram_matrix_time'\n",
" 438 ]\n",
" 439 145.1 MiB 0.0 MiB print()\n",
" 440 145.1 MiB 0.0 MiB tb_print = tabulate(\n",
" 441 145.1 MiB 0.0 MiB OrderedDict(\n",
" 442 145.1 MiB 0.0 MiB sorted(table_dict.items(),\n",
" 443 145.1 MiB 0.0 MiB key=lambda i: keyorder.index(i[0]))),\n",
" 444 145.1 MiB 0.0 MiB headers='keys')\n",
" 445 # print(tb_print)\n",
" 446 145.1 MiB 0.0 MiB str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n",
" 447 \n",
" 448 # read gram matrices from file.\n",
" 449 else: \n",
" 450 # Grid of parameters with a discrete number of values for each.\n",
" 451 # param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n",
" 452 param_list = list(ParameterGrid(param_grid))\n",
" 453 \n",
" 454 # read gram matrices from file.\n",
" 455 print()\n",
" 456 print('2. Reading gram matrices from file...')\n",
" 457 str_fw += '\\nII. Gram matrices.\\n\\nGram matrices are read from file, see last log for detail.\\n'\n",
" 458 gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')\n",
" 459 gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed\n",
" 460 gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices\n",
" 461 param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones\n",
" 462 y = gmfile['y'].tolist()\n",
" 463 \n",
" 464 tts = time.time() # start training time\n",
" 465 # nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) \n",
" 466 print(\n",
" 467 '3. Fitting and predicting using nested cross validation. This could really take a while...'\n",
" 468 )\n",
" 469 \n",
" 470 # ---- use pool.imap_unordered to parallel and track progress. ----\n",
" 471 def init_worker(gms_toshare):\n",
" 472 global G_gms\n",
" 473 G_gms = gms_toshare\n",
" 474 \n",
" 475 pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n",
" 476 trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n",
" 477 train_pref = []\n",
" 478 val_pref = []\n",
" 479 test_pref = []\n",
" 480 chunksize = 1\n",
" 481 for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n",
" 482 train_pref.append(o1)\n",
" 483 val_pref.append(o2)\n",
" 484 test_pref.append(o3)\n",
" 485 pool.close()\n",
" 486 pool.join()\n",
" 487 \n",
" 488 # # ---- use pool.map to parallel. ----\n",
" 489 # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n",
" 490 # train_pref = [item[0] for item in result_perf]\n",
" 491 # val_pref = [item[1] for item in result_perf]\n",
" 492 # test_pref = [item[2] for item in result_perf]\n",
" 493 \n",
" 494 # # ---- use joblib.Parallel to parallel and track progress. ----\n",
" 495 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)\n",
" 496 # result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS))\n",
" 497 # train_pref = [item[0] for item in result_perf]\n",
" 498 # val_pref = [item[1] for item in result_perf]\n",
" 499 # test_pref = [item[2] for item in result_perf]\n",
" 500 \n",
" 501 # # ---- direct running, normally use a single CPU core. ----\n",
" 502 # train_pref = []\n",
" 503 # val_pref = []\n",
" 504 # test_pref = []\n",
" 505 # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n",
" 506 # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n",
" 507 # train_pref.append(o1)\n",
" 508 # val_pref.append(o2)\n",
" 509 # test_pref.append(o3)\n",
" 510 \n",
" 511 print()\n",
" 512 print('4. Getting final performance...')\n",
" 513 str_fw += '\\nIII. Performance.\\n\\n'\n",
" 514 # averages and confidences of performances on outer trials for each combination of parameters\n",
" 515 average_train_scores = np.mean(train_pref, axis=0)\n",
" 516 average_val_scores = np.mean(val_pref, axis=0)\n",
" 517 average_perf_scores = np.mean(test_pref, axis=0)\n",
" 518 # sample std is used here\n",
" 519 std_train_scores = np.std(train_pref, axis=0, ddof=1)\n",
" 520 std_val_scores = np.std(val_pref, axis=0, ddof=1)\n",
" 521 std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n",
" 522 \n",
" 523 if model_type == 'regression':\n",
" 524 best_val_perf = np.amin(average_val_scores)\n",
" 525 else:\n",
" 526 best_val_perf = np.amax(average_val_scores)\n",
" 527 best_params_index = np.where(average_val_scores == best_val_perf)\n",
" 528 # find smallest val std with best val perf.\n",
" 529 best_val_stds = [\n",
" 530 std_val_scores[value][best_params_index[1][idx]]\n",
" 531 for idx, value in enumerate(best_params_index[0])\n",
" 532 ]\n",
" 533 min_val_std = np.amin(best_val_stds)\n",
" 534 best_params_index = np.where(std_val_scores == min_val_std)\n",
" 535 best_params_out = [\n",
" 536 param_list_pre_revised[i] for i in best_params_index[0]\n",
" 537 ]\n",
" 538 best_params_in = [param_list[i] for i in best_params_index[1]]\n",
" 539 print('best_params_out: ', best_params_out)\n",
" 540 print('best_params_in: ', best_params_in)\n",
" 541 print()\n",
" 542 print('best_val_perf: ', best_val_perf)\n",
" 543 print('best_val_std: ', min_val_std)\n",
" 544 str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n",
" 545 str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n",
" 546 str_fw += 'best_val_perf: %s\\n' % best_val_perf\n",
" 547 str_fw += 'best_val_std: %s\\n' % min_val_std\n",
" 548 \n",
" 549 final_performance = [\n",
" 550 average_perf_scores[value][best_params_index[1][idx]]\n",
" 551 for idx, value in enumerate(best_params_index[0])\n",
" 552 ]\n",
" 553 final_confidence = [\n",
" 554 std_perf_scores[value][best_params_index[1][idx]]\n",
" 555 for idx, value in enumerate(best_params_index[0])\n",
" 556 ]\n",
" 557 print('final_performance: ', final_performance)\n",
" 558 print('final_confidence: ', final_confidence)\n",
" 559 str_fw += 'final_performance: %s\\n' % final_performance\n",
" 560 str_fw += 'final_confidence: %s\\n' % final_confidence\n",
" 561 train_performance = [\n",
" 562 average_train_scores[value][best_params_index[1][idx]]\n",
" 563 for idx, value in enumerate(best_params_index[0])\n",
" 564 ]\n",
" 565 train_std = [\n",
" 566 std_train_scores[value][best_params_index[1][idx]]\n",
" 567 for idx, value in enumerate(best_params_index[0])\n",
" 568 ]\n",
" 569 print('train_performance: %s' % train_performance)\n",
" 570 print('train_std: ', train_std)\n",
" 571 str_fw += 'train_performance: %s\\n' % train_performance\n",
" 572 str_fw += 'train_std: %s\\n\\n' % train_std\n",
" 573 \n",
" 574 print()\n",
" 575 average_gram_matrix_time = np.mean(gram_matrix_time)\n",
" 576 std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n",
" 577 best_gram_matrix_time = [\n",
" 578 gram_matrix_time[i] for i in best_params_index[0]\n",
" 579 ]\n",
" 580 ave_bgmt = np.mean(best_gram_matrix_time)\n",
" 581 std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n",
" 582 print(\n",
" 583 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n",
" 584 .format(average_gram_matrix_time, std_gram_matrix_time))\n",
" 585 print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n",
" 586 ave_bgmt, std_bgmt))\n",
" 587 tt_poster = time.time() - tts # training time with hyper-param choices who did not participate in calculation of gram matrices\n",
" 588 print(\n",
" 589 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format(\n",
" 590 tt_poster))\n",
" 591 print('total training time with all hyper-param choices: {:.2f}s'.format(\n",
" 592 tt_poster + np.sum(gram_matrix_time)))\n",
" 593 # str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n",
" 594 # str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n",
" 595 str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\\n\\n'.format(tt_poster)\n",
" 596 \n",
" 597 # print out as table.\n",
" 598 from collections import OrderedDict\n",
" 599 from tabulate import tabulate\n",
" 600 table_dict = {}\n",
" 601 if model_type == 'regression':\n",
" 602 for param_in in param_list:\n",
" 603 param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n",
" 604 else:\n",
" 605 for param_in in param_list:\n",
" 606 param_in['C'] = '{:.2e}'.format(param_in['C'])\n",
" 607 table_dict['params'] = [{**param_out, **param_in}\n",
" 608 for param_in in param_list for param_out in param_list_pre_revised]\n",
" 609 # table_dict['gram_matrix_time'] = [\n",
" 610 # '{:.2f}'.format(gram_matrix_time[index_out])\n",
" 611 # for param_in in param_list\n",
" 612 # for index_out, _ in enumerate(param_list_pre_revised)\n",
" 613 # ]\n",
" 614 table_dict['valid_perf'] = [\n",
" 615 '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n",
" 616 std_val_scores[index_out][index_in])\n",
" 617 for index_in, _ in enumerate(param_list)\n",
" 618 for index_out, _ in enumerate(param_list_pre_revised)\n",
" 619 ]\n",
" 620 table_dict['test_perf'] = [\n",
" 621 '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n",
" 622 std_perf_scores[index_out][index_in])\n",
" 623 for index_in, _ in enumerate(param_list)\n",
" 624 for index_out, _ in enumerate(param_list_pre_revised)\n",
" 625 ]\n",
" 626 table_dict['train_perf'] = [\n",
" 627 '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n",
" 628 std_train_scores[index_out][index_in])\n",
" 629 for index_in, _ in enumerate(param_list)\n",
" 630 for index_out, _ in enumerate(param_list_pre_revised)\n",
" 631 ]\n",
" 632 keyorder = [\n",
" 633 'params', 'train_perf', 'valid_perf', 'test_perf'\n",
" 634 ]\n",
" 635 print()\n",
" 636 tb_print = tabulate(\n",
" 637 OrderedDict(\n",
" 638 sorted(table_dict.items(),\n",
" 639 key=lambda i: keyorder.index(i[0]))),\n",
" 640 headers='keys')\n",
" 641 # print(tb_print)\n",
" 642 str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n",
" 643 \n",
" 644 # open file to save all results for this dataset.\n",
" 645 if not os.path.exists(results_dir):\n",
" 646 os.makedirs(results_dir)\n",
" 647 \n",
" 648 # open file to save all results for this dataset.\n",
" 649 145.1 MiB 0.0 MiB if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):\n",
" 650 with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:\n",
" 651 f.write(str_fw)\n",
" 652 else:\n",
" 653 145.1 MiB 0.0 MiB with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f:\n",
" 654 145.1 MiB 0.0 MiB content = f.read()\n",
" 655 145.1 MiB 0.0 MiB f.seek(0, 0)\n",
" 656 145.1 MiB 0.0 MiB f.write(str_fw + '\\n\\n\\n' + content)\n",
"\n",
"\n",
"\n"
]
}
],
"source": [
"import functools\n",
"from libs import *\n",
"import multiprocessing\n",
"\n",
"from pygraph.kernels.spKernel import spkernel\n",
"from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n",
"#from pygraph.utils.model_selection_precomputed import trial_do\n",
"\n",
"dslist = [\n",
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n",
" 'task': 'regression'}, # node symb\n",
"# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
"# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n",
"# # contains single node graph, node symb\n",
"# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb\n",
"# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n",
"# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
"# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n",
"# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
"# # node nsymb\n",
"# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
"# # node symb/nsymb\n",
"# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
"# # node/edge symb\n",
"# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
"# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n",
"\n",
" # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n",
" # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n",
" # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n",
" # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n",
" #\n",
" # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n",
" # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n",
" # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n",
" # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n",
" # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n",
"\n",
" # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n",
" # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n",
" # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n",
" # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
" # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
" # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
" # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n",
"\n",
" # # not working below\n",
" # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
" # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n",
" # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n",
" # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
"]\n",
"estimator = spkernel\n",
"mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n",
"param_grid_precomputed = {'node_kernels': [\n",
" {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n",
"param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n",
" {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n",
"\n",
"for ds in dslist:\n",
" print()\n",
" print(ds['name'])\n",
" model_selection_for_precomputed_kernel(\n",
" ds['dataset'],\n",
" estimator,\n",
" param_grid_precomputed,\n",
" (param_grid[1] if ('task' in ds and ds['task']\n",
" == 'regression') else param_grid[0]),\n",
" (ds['task'] if 'task' in ds else 'classification'),\n",
" NUM_TRIALS=30,\n",
" datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n",
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n",
" ds_name=ds['name'],\n",
" n_jobs=multiprocessing.cpu_count(),\n",
" read_gm_from_file=False)\n",
" print()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

+ 28114
- 1487
notebooks/plot_all_graphs.ipynb
File diff suppressed because it is too large
View File


+ 3
- 4
notebooks/run_commonwalkkernel.py View File

@@ -6,7 +6,6 @@ Created on Fri Sep 28 17:01:13 2018
@author: ljia
"""

import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel
@@ -61,10 +60,10 @@ dslist = [
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = commonwalkkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = [{'compute_method': ['geo'],
'weight': np.logspace(0, -10, num=11, base=10)},
{'compute_method': ['exp'], 'weight': range(0, 10)}]
'weight': np.linspace(0.01, 0.15, 15)},
# 'weight': np.logspace(-1, -10, num=10, base=10)},
{'compute_method': ['exp'], 'weight': range(0, 15)}]
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]



+ 76
- 0
notebooks/run_degree_differs_cw.py View File

@@ -0,0 +1,76 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 17:16:19 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing

dslist = [
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.commonWalkKernel import commonwalkkernel
estimator = commonwalkkernel
param_grid_precomputed = [{'compute_method': ['geo'],
'weight': np.linspace(0.01, 0.15, 15)},
# 'weight': np.logspace(-1, -10, num=10, base=10)},
{'compute_method': ['exp'], 'weight': range(0, 15)}]
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
degree_list = [np.mean(list(dict(g.degree()).values())) for g in Gn]
idx_sorted = np.argsort(degree_list)
degree_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
ave_degree = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
aved = np.mean(degree_list[len_1piece * piece:len_1piece * (piece + 1)])
ave_degree.append(aved)
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print('corresponding average vertex degrees are', ave_degree)
print()

+ 78
- 0
notebooks/run_degree_differs_ma.py View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 17:43:38 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing

dslist = [
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.marginalizedKernel import marginalizedkernel
estimator = marginalizedkernel
#param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.3, 3),
# 'n_iteration': np.linspace(1, 1, 1),
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9),
'n_iteration': np.linspace(1, 19, 7),
'remove_totters': [False]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
degree_list = [np.mean(list(dict(g.degree()).values())) for g in Gn]
idx_sorted = np.argsort(degree_list)
degree_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
ave_degree = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
aved = np.mean(degree_list[len_1piece * piece:len_1piece * (piece + 1)])
ave_degree.append(aved)
# print(np.mean([nx.number_of_nodes(g) for g in Gn_p]))
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print('corresponding average vertex degrees are', ave_degree)
print()

+ 102
- 0
notebooks/run_degree_differs_rw.py View File

@@ -0,0 +1,102 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 17:48:06 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]
ave_time = {}
std_time = {}
for compute_method in ['sylvester', 'conjugate', 'fp', 'spectral']:
if compute_method == 'sylvester':
param_grid_precomputed = {'compute_method': ['sylvester'],
# 'weight': np.linspace(0.01, 0.10, 10)}
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'conjugate':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['conjugate'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'fp':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['fp'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-3, -10, num=8, base=10)}
elif compute_method == 'spectral':
param_grid_precomputed = {'compute_method': ['spectral'],
'weight': np.logspace(-1, -10, num=10, base=10),
'sub_kernel': ['geo', 'exp']}
_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
ave_time[compute_method] = average_gram_matrix_time
std_time[compute_method] = std_gram_matrix_time
print()
return ave_time, std_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
degree_list = [np.mean(list(dict(g.degree()).values())) for g in Gn]
idx_sorted = np.argsort(degree_list)
degree_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
ave_degree = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
aved = np.mean(degree_list[len_1piece * piece:len_1piece * (piece + 1)])
ave_degree.append(aved)
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print('corresponding average vertex degrees are', ave_degree)
print()

+ 77
- 0
notebooks/run_degree_differs_sp.py View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 17:46:02 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import functools
import multiprocessing
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.spKernel import spkernel
estimator = spkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels': [
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
degree_list = [np.mean(list(dict(g.degree()).values())) for g in Gn]
idx_sorted = np.argsort(degree_list)
degree_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
ave_degree = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
aved = np.mean(degree_list[len_1piece * piece:len_1piece * (piece + 1)])
ave_degree.append(aved)
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print('corresponding average vertex degrees are', ave_degree)
print()

+ 79
- 0
notebooks/run_degree_differs_ssp.py View File

@@ -0,0 +1,79 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 17:46:41 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import functools
import multiprocessing
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.structuralspKernel import structuralspkernel
estimator = structuralspkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
degree_list = [np.mean(list(dict(g.degree()).values())) for g in Gn]
idx_sorted = np.argsort(degree_list)
degree_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
ave_degree = []
for piece in range(1, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
aved = np.mean(degree_list[len_1piece * piece:len_1piece * (piece + 1)])
ave_degree.append(aved)
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print('corresponding average vertex degrees are', ave_degree)
print()

+ 74
- 0
notebooks/run_degree_differs_uhp.py View File

@@ -0,0 +1,74 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 17:47:22 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing

dslist = [
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.untilHPathKernel import untilhpathkernel
estimator = untilhpathkernel
param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2],
'k_func': ['MinMax', 'tanimoto']} # ['MinMax']}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
degree_list = [np.mean(list(dict(g.degree()).values())) for g in Gn]
idx_sorted = np.argsort(degree_list)
degree_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
ave_degree = []
for piece in range(1, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
aved = np.mean(degree_list[len_1piece * piece:len_1piece * (piece + 1)])
ave_degree.append(aved)
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print('corresponding average vertex degrees are', ave_degree)
print()

+ 15
- 13
notebooks/run_marginalizedkernel.py View File

@@ -12,17 +12,17 @@ import multiprocessing
from pygraph.kernels.marginalizedKernel import marginalizedkernel

dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
@@ -58,8 +58,10 @@ dslist = [
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = marginalizedkernel
#param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.3, 3),
# 'n_iteration': np.linspace(1, 1, 1),
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9),
'n_iteration': np.linspace(2, 20, 10),
'n_iteration': np.linspace(1, 19, 7),
'remove_totters': [False]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]
@@ -79,5 +81,5 @@ for ds in dslist:
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
read_gm_from_file=True)
print()

+ 1
- 1
notebooks/run_randomwalkkernel.ipynb View File

@@ -1734,7 +1734,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.6.6"
}
},
"nbformat": 4,


+ 110
- 0
notebooks/run_randomwalkkernel.py View File

@@ -0,0 +1,110 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 22 17:02:28 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing

from pygraph.kernels.randomWalkKernel import randomwalkkernel
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

import numpy as np


dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# # node/edge symb
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values
# {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb

# # not working below
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = randomwalkkernel
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
for compute_method in ['sylvester', 'conjugate', 'fp', 'spectral']:
if compute_method == 'sylvester':
param_grid_precomputed = {'compute_method': ['sylvester'],
# 'weight': np.linspace(0.01, 0.10, 10)}
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'conjugate':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['conjugate'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'fp':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['fp'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-3, -10, num=8, base=10)}
elif compute_method == 'spectral':
param_grid_precomputed = {'compute_method': ['spectral'],
'weight': np.logspace(-1, -10, num=10, base=10),
'sub_kernel': ['geo', 'exp']}
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 70
- 0
notebooks/run_rwalk_symonly.py View File

@@ -0,0 +1,70 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:56:44 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing

from pygraph.kernels.rwalk_sym import randomwalkkernel
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

import numpy as np


dslist = [
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]
estimator = randomwalkkernel
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
for compute_method in ['conjugate', 'fp']:
if compute_method == 'sylvester':
param_grid_precomputed = {'compute_method': ['sylvester'],
# 'weight': np.linspace(0.01, 0.10, 10)}
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'conjugate':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['conjugate'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'fp':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['fp'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-3, -10, num=8, base=10)}
elif compute_method == 'spectral':
param_grid_precomputed = {'compute_method': ['spectral'],
'weight': np.logspace(-1, -10, num=10, base=10),
'sub_kernel': ['geo', 'exp']}
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 61
- 0
notebooks/run_sp_symonly.py View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 21 17:59:28 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing

from pygraph.kernels.sp_sym import spkernel
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
#from pygraph.utils.model_selection_precomputed import trial_do

dslist = [
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
]
estimator = spkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels': [
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 2
- 2
notebooks/run_spkernel.py View File

@@ -21,7 +21,7 @@ dslist = [
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# node/edge symb
# # node/edge symb
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb

@@ -75,4 +75,4 @@ for ds in dslist:
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()
print()

+ 47
- 0
notebooks/run_ssp_symonly.py View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:40:52 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing

from pygraph.kernels.ssp_sym import structuralspkernel
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]
estimator = structuralspkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 308
- 0
notebooks/run_structuralspkernel.ipynb View File

@@ -0,0 +1,308 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"MAO\n",
"\n",
"--- This is a classification problem ---\n",
"\n",
"\n",
"1. Loading dataset from file...\n",
"\n",
"2. Calculating gram matrices. This could take a while...\n",
"\n",
" None edge weight specified. Set all weight to 1.\n",
"\n",
"getting shortest paths: 68it [00:00, 629.46it/s]\n",
"calculating kernels: 2346it [00:22, 102.31it/s]\n",
"\n",
" --- shortest path kernel matrix of size 68 built in 23.390946626663208 seconds ---\n",
"\n",
"the gram matrix with parameters {'edge_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'n_jobs': 8} is: \n",
"\n",
"1 gram matrices are calculated, 0 of which are ignored.\n",
"\n",
"3. Fitting and predicting using nested cross validation. This could really take a while...\n",
"cross validation: 0%| | 0/30 [00:00<?, ?it/s]0 0\n",
"params_in: {'C': 1e-10}\n",
"0 1\n",
"params_in: {'C': 3.1622776601683795e-10}\n",
"0 2\n",
"params_in: {'C': 1e-09}\n",
"0 3\n",
"params_in: {'C': 3.1622776601683795e-09}\n",
"0 4\n",
"params_in: {'C': 1e-08}\n",
"0 5\n",
"params_in: {'C': 3.162277660168379e-08}\n",
"0 6\n",
"params_in: {'C': 1e-07}\n",
"0 7\n",
"params_in: {'C': 3.162277660168379e-07}\n",
"0 8\n",
"params_in: {'C': 1e-06}\n",
"0 9\n",
"params_in: {'C': 3.162277660168379e-06}\n",
"0 10\n",
"params_in: {'C': 1e-05}\n",
"0 11\n",
"params_in: {'C': 3.1622776601683795e-05}\n",
"0 12\n",
"params_in: {'C': 0.0001}\n",
"0 13\n",
"params_in: {'C': 0.00031622776601683794}\n",
"0 14\n",
"params_in: {'C': 0.001}\n",
"0 15\n",
"params_in: {'C': 0.0031622776601683794}\n",
"0 16\n",
"params_in: {'C': 0.01}\n",
"0 17\n",
"params_in: {'C': 0.03162277660168379}\n",
"0 18\n",
"params_in: {'C': 0.1}\n",
"0 19\n",
"params_in: {'C': 0.31622776601683794}\n",
"0 20\n",
"params_in: {'C': 1.0}\n",
"0 21\n",
"params_in: {'C': 3.1622776601683795}\n",
"0 22\n",
"params_in: {'C': 10.0}\n",
"0 23\n",
"params_in: {'C': 31.622776601683793}\n",
"0 24\n",
"params_in: {'C': 100.0}\n",
"0 25\n",
"params_in: {'C': 316.22776601683796}\n",
"0 26\n",
"params_in: {'C': 1000.0}\n",
"0 27\n",
"params_in: {'C': 3162.2776601683795}\n",
"0 28\n",
"params_in: {'C': 10000.0}\n",
"0 29\n",
"params_in: {'C': 31622.776601683792}\n",
"0 30\n",
"params_in: {'C': 100000.0}\n",
"0 31\n",
"params_in: {'C': 316227.7660168379}\n",
"0 32\n",
"params_in: {'C': 1000000.0}\n",
"0 33\n",
"params_in: {'C': 3162277.6601683795}\n",
"0 34\n",
"params_in: {'C': 10000000.0}\n",
"0 35\n",
"params_in: {'C': 31622776.60168379}\n",
"0 36\n",
"params_in: {'C': 100000000.0}\n",
"0 37\n",
"params_in: {'C': 316227766.01683795}\n",
"0 38\n",
"params_in: {'C': 1000000000.0}\n",
"0 39\n",
"params_in: {'C': 3162277660.1683793}\n",
"0 40\n",
"params_in: {'C': 10000000000.0}\n",
"val_pref: [[0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
" 0.59285714 0.59285714 0.55952381 0.71666667 0.81666667 0.81666667\n",
" 0.83571429 0.86666667 0.9 0.9 0.9 0.9\n",
" 0.9 0.9 0.9 0.9 0.9 0.9\n",
" 0.9 0.9 0.9 0.9 0.9 ]]\n",
"test_pref: [[0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
" 0.28571429 0.28571429 0.61428571 0.84285714 0.84285714 0.85714286\n",
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286]]\n",
"cross validation: 100%|██████████| 30/30 [00:11<00:00, 2.75it/s]\n",
"\n",
"\n",
"4. Getting final performance...\n",
"val_pref: [0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
" 0.59285714 0.59285714 0.55952381 0.71666667 0.81666667 0.81666667\n",
" 0.83571429 0.86666667 0.9 0.9 0.9 0.9\n",
" 0.9 0.9 0.9 0.9 0.9 0.9\n",
" 0.9 0.9 0.9 0.9 0.9 ]\n",
"test_pref: [0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
" 0.28571429 0.28571429 0.61428571 0.84285714 0.84285714 0.85714286\n",
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286]\n",
"average_val_scores: [[0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n",
" 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n",
" 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n",
" 0.55301587 0.55468254 0.61507937 0.71777778 0.78039683 0.80531746\n",
" 0.86198413 0.89531746 0.89420635 0.87190476 0.85761905 0.85761905\n",
" 0.85761905 0.85761905 0.85761905 0.85761905 0.85761905 0.85761905\n",
" 0.85761905 0.85761905 0.85761905 0.85761905 0.85761905]]\n",
"best_val_perf: 0.8953174603174604\n",
"\n",
"best_params_out: [{'edge_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'n_jobs': 8}]\n",
"best_params_in: [{'C': 316.22776601683796}]\n",
"\n",
"best_val_perf: 0.8953174603174604\n",
"best_val_std: 0.029090007386146643\n",
"(array([0]), array([25]))\n",
"[0]\n",
"[[0.5047619 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619\n",
" 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619\n",
" 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619\n",
" 0.5047619 0.49761905 0.66 0.75857143 0.78857143 0.82857143\n",
" 0.85285714 0.86380952 0.84428571 0.82190476 0.81571429 0.81571429\n",
" 0.81571429 0.81571429 0.81571429 0.81571429 0.81571429 0.81571429\n",
" 0.81571429 0.81571429 0.81571429 0.81571429 0.81571429]]\n",
"final_performance: [0.8638095238095236]\n",
"final_confidence: [0.10509426306201483]\n",
"train_performance: [0.9857934904601572]\n",
"train_std: [0.00730576290039335]\n",
"\n",
"time to calculate gram matrix with different hyper-params: 23.39±nans\n",
"time to calculate best gram matrix: 23.39±nans\n",
"total training time with all hyper-param choices: 34.88s\n",
"\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n",
" keepdims=keepdims)\n",
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n",
" ret = ret.dtype.type(ret / rcount)\n"
]
}
],
"source": [
"#!/usr/bin/env python3\n",
"# -*- coding: utf-8 -*-\n",
"\"\"\"\n",
"Created on Fri Sep 28 16:37:29 2018\n",
"\n",
"@author: ljia\n",
"\"\"\"\n",
"\n",
"import functools\n",
"from libs import *\n",
"import multiprocessing\n",
"\n",
"from pygraph.kernels.structuralspKernel import structuralspkernel\n",
"from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n",
"\n",
"dslist = [\n",
"# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n",
"# 'task': 'regression'}, # node symb\n",
"# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
"# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n",
"# # contains single node graph, node symb\n",
" {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb\n",
"# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n",
"# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
"# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n",
"# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
" # node nsymb\n",
"# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
"# # node symb/nsymb\n",
"# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
"# # node/edge symb\n",
"# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
"# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n",
"\n",
" # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n",
" # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n",
" # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n",
" # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n",
" #\n",
" # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n",
" # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n",
"# {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values\n",
"# {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values\n",
" # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n",
"\n",
" # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n",
" # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n",
" # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n",
" # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
" # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
" # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
" # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n",
"\n",
"# # not working below\n",
"# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
" # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n",
" # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n",
" # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
"]\n",
"estimator = structuralspkernel\n",
"mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n",
"param_grid_precomputed = {'node_kernels': \n",
" [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],\n",
" 'edge_kernels': \n",
" [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n",
"param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n",
" {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n",
"\n",
"for ds in dslist:\n",
" print()\n",
" print(ds['name'])\n",
" model_selection_for_precomputed_kernel(\n",
" ds['dataset'],\n",
" estimator,\n",
" param_grid_precomputed,\n",
" (param_grid[1] if ('task' in ds and ds['task']\n",
" == 'regression') else param_grid[0]),\n",
" (ds['task'] if 'task' in ds else 'classification'),\n",
" NUM_TRIALS=30,\n",
" datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n",
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n",
" ds_name=ds['name'],\n",
" n_jobs=multiprocessing.cpu_count(),\n",
" read_gm_from_file=False)\n",
" print()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

+ 3
- 5
notebooks/run_untilhpathkernel.py View File

@@ -6,10 +6,8 @@ Created on Fri Oct 5 19:19:33 2018
@author: ljia
"""

import functools
from libs import *
import multiprocessing
from sklearn.metrics.pairwise import rbf_kernel

from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.utils.kernels import deltakernel, kernelproduct
@@ -61,9 +59,9 @@ dslist = [
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
]
estimator = untilhpathkernel
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel)
param_grid_precomputed = {'depth': np.linspace(1, 10, 10),
'k_func': ['tanimoto', 'MinMax']}
param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2],
'k_func': ['MinMax', 'tanimoto'],
'compute_method': ['trie']} # ['MinMax']}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]



+ 86
- 0
notebooks/run_vertex_differs_cw.py View File

@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 17:57:18 2018

@author: ljia
"""
import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.commonWalkKernel import commonwalkkernel
estimator = commonwalkkernel
param_grid_precomputed = [{'compute_method': ['geo'],
'weight': np.linspace(0.01, 0.15, 15)},
# 'weight': np.logspace(-1, -10, num=10, base=10)},
{'compute_method': ['exp'], 'weight': range(0, 15)}]
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
vn_list = [nx.number_of_nodes(g) for g in Gn]
idx_sorted = np.argsort(vn_list)
vn_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
ave_vnb = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
avevn = np.mean(vn_list[len_1piece * piece:len_1piece * (piece + 1)])
ave_vnb.append(avevn)
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print('corresponding average vertex numbers are', ave_vnb)
print()

+ 83
- 0
notebooks/run_vertex_differs_ma.py View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 15:16:17 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.marginalizedKernel import marginalizedkernel
estimator = marginalizedkernel
#param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.3, 3),
# 'n_iteration': np.linspace(1, 1, 1),
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9),
'n_iteration': np.linspace(1, 19, 7),
'remove_totters': [False]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
vn_list = [nx.number_of_nodes(g) for g in Gn]
idx_sorted = np.argsort(vn_list)
vn_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print()

+ 108
- 0
notebooks/run_vertex_differs_rw.py View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 15:22:35 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]
ave_time = {}
std_time = {}
for compute_method in ['sylvester', 'conjugate', 'fp', 'spectral']:
if compute_method == 'sylvester':
param_grid_precomputed = {'compute_method': ['sylvester'],
# 'weight': np.linspace(0.01, 0.10, 10)}
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'conjugate':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['conjugate'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'fp':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['fp'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-3, -10, num=8, base=10)}
elif compute_method == 'spectral':
param_grid_precomputed = {'compute_method': ['spectral'],
'weight': np.logspace(-1, -10, num=10, base=10),
'sub_kernel': ['geo', 'exp']}
_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
ave_time[compute_method] = average_gram_matrix_time
std_time[compute_method] = std_gram_matrix_time
print()
return ave_time, std_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
vn_list = [nx.number_of_nodes(g) for g in Gn]
idx_sorted = np.argsort(vn_list)
vn_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print()

+ 83
- 0
notebooks/run_vertex_differs_sp.py View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 16:00:37 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import functools
import multiprocessing
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.spKernel import spkernel
estimator = spkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels': [
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
vn_list = [nx.number_of_nodes(g) for g in Gn]
idx_sorted = np.argsort(vn_list)
vn_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print()

+ 85
- 0
notebooks/run_vertex_differs_ssp.py View File

@@ -0,0 +1,85 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 16:23:39 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import functools
import multiprocessing
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.structuralspKernel import structuralspkernel
estimator = structuralspkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
vn_list = [nx.number_of_nodes(g) for g in Gn]
idx_sorted = np.argsort(vn_list)
vn_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
for piece in range(4, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print()

+ 80
- 0
notebooks/run_vertex_differs_uhp.py View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 8 16:25:33 2019

@author: ljia
"""

import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.utils.model_selection_precomputed import compute_gram_matrices
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]

def run_ms(dataset, y, ds):
from pygraph.kernels.untilHPathKernel import untilhpathkernel
estimator = untilhpathkernel
param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2],
'k_func': ['MinMax', 'tanimoto']} # ['MinMax']}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

_, gram_matrix_time, _, _, _ = compute_gram_matrices(
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
'../notebooks/results/' + estimator.__name__, ds['name'],
n_jobs=multiprocessing.cpu_count(), verbose=False)
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print()
return average_gram_matrix_time, std_gram_matrix_time


for ds in dslist:
print()
print(ds['name'])
Gn, y_all = loadDataset(
ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
vn_list = [nx.number_of_nodes(g) for g in Gn]
idx_sorted = np.argsort(vn_list)
vn_list.sort()
Gn = [Gn[idx] for idx in idx_sorted]
y_all = [y_all[idx] for idx in idx_sorted]
len_1piece = int(len(Gn) / 5)
ave_time = []
std_time = []
for piece in range(0, 5):
print('piece', str(piece), ':')
Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
avet, stdt = run_ms(Gn_p, y_all_p, ds)
ave_time.append(avet)
std_time.append(stdt)
print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
'\nthe time std is \n', std_time)
print()

+ 47
- 0
notebooks/test_mpi.py View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test Message Passing Interface for cluster paralleling.
Created on Wed Nov 7 17:26:40 2018

@author: ljia
"""

from mpi4py import MPI

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

import numpy as np
import time
size = comm.Get_size()
numDataPerRank = 10
data = None
if rank == 0:
data = np.linspace(1, size * numDataPerRank, size * numDataPerRank)
recvbuf = np.empty(numDataPerRank, dtype='d')
comm.Scatter(data, recvbuf, root=0)
recvbuf += 1
print('Rank: ', rank, ', recvbuf received: ', recvbuf, ', size: ', size, ', time: ', time.time())

#if rank == 0:
# data = {'key1' : [1,2, 3],
# 'key2' : ( 'abc', 'xyz')}
#else:
# data = None
#
#data = comm.bcast(data, root=0)
#print('Rank: ',rank,', data: ' ,data)

#if rank == 0:
# data = {'a': 7, 'b': 3.14}
# comm.send(data, dest=1)
#elif rank == 1:
# data = comm.recv(source=0)
# print('On process 1, data is ', data)

#print('My rank is ', rank)

#for i in range(0, 100000000):
# print(i)

+ 482
- 484
notebooks/test_parallel.py
File diff suppressed because it is too large
View File


+ 189
- 410
notebooks/test_parallel/myria/0.eps View File

@@ -1,7 +1,7 @@
%!PS-Adobe-3.0 EPSF-3.0
%%Title: test_parallel/0.eps
%%Creator: matplotlib version 2.2.3, http://matplotlib.org/
%%CreationDate: Wed Oct 3 16:34:31 2018
%%CreationDate: Mon Oct 8 06:59:56 2018
%%Orientation: portrait
%%BoundingBox: 75 223 536 568
%%EndComments
@@ -42,7 +42,7 @@ newpath
/FontMatrix[.001 0 0 .001 0 0]def
/FontBBox[-1021 -463 1793 1232]def
/FontType 3 def
/Encoding [ /space /parenleft /parenright /period /zero /one /two /three /four /five /A /R /S /V /a /c /d /e /f /h /i /k /l /m /n /o /p /r /s /t /u /y /z ] def
/Encoding [ /space /parenleft /parenright /period /zero /one /two /three /four /five /E /M /N /R /S /V /Y /Z /a /c /d /e /f /h /i /k /l /m /n /o /p /r /s /t /u /z ] def
/FontInfo 10 dict dup begin
/FamilyName (DejaVu Sans) def
/FullName (DejaVu Sans) def
@@ -54,7 +54,7 @@ newpath
/UnderlinePosition -130 def
/UnderlineThickness 90 def
end readonly def
/CharStrings 34 dict dup begin
/CharStrings 37 dict dup begin
/.notdef 0 def
/space{318 0 0 0 0 0 _sc
}_d
@@ -224,20 +224,49 @@ _cl}_d
162 384 135 375 108 363 _c
108 729 _l
_cl}_e}_d
/A{684 0 8 0 676 729 _sc
342 632 _m
208 269 _l
476 269 _l
342 632 _l
286 729 _m
398 729 _l
676 0 _l
573 0 _l
507 187 _l
178 187 _l
112 0 _l
8 0 _l
286 729 _l
/E{632 0 98 0 568 729 _sc
98 729 _m
559 729 _l
559 646 _l
197 646 _l
197 430 _l
544 430 _l
544 347 _l
197 347 _l
197 83 _l
568 83 _l
568 0 _l
98 0 _l
98 729 _l
_cl}_d
/M{863 0 98 0 765 729 _sc
98 729 _m
245 729 _l
431 233 _l
618 729 _l
765 729 _l
765 0 _l
669 0 _l
669 640 _l
481 140 _l
382 140 _l
194 640 _l
194 0 _l
98 0 _l
98 729 _l
_cl}_d
/N{748 0 98 0 650 729 _sc
98 729 _m
231 729 _l
554 119 _l
554 729 _l
650 729 _l
650 0 _l
517 0 _l
194 610 _l
194 0 _l
98 0 _l
98 729 _l
_cl}_d
/R{{695 0 98 0 666 729 _sc
444 342 _m
@@ -307,6 +336,31 @@ _cl}_e}_d
398 0 _l
286 0 _l
_cl}_d
/Y{611 0 -1 0 613 729 _sc
-1 729 _m
104 729 _l
306 429 _l
507 729 _l
613 729 _l
355 347 _l
355 0 _l
256 0 _l
256 347 _l
-1 729 _l
_cl}_d
/Z{685 0 45 0 640 729 _sc
56 729 _m
629 729 _l
629 654 _l
168 83 _l
640 83 _l
640 0 _l
45 0 _l
45 75 _l
506 646 _l
56 646 _l
56 729 _l
_cl}_d
/a{{613 0 60 -13 522 560 _sc
343 275 _m
270 275 220 266 192 250 _c
@@ -666,23 +720,6 @@ _cl}_d
311 560 _m
311 560 _l
_cl}_d
/y{592 0 30 -207 562 547 _sc
322 -50 _m
296 -114 271 -157 247 -177 _c
223 -197 191 -207 151 -207 _c
79 -207 _l
79 -132 _l
132 -132 _l
156 -132 175 -126 189 -114 _c
203 -102 218 -75 235 -31 _c
251 9 _l
30 547 _l
125 547 _l
296 119 _l
467 547 _l
562 547 _l
322 -50 _l
_cl}_d
/z{525 0 43 0 482 547 _sc
55 547 _m
482 547 _l
@@ -837,47 +874,7 @@ grestore
73.8327 38.016 o
grestore
gsave
64.832727 23.094125 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
setfont
0.000000 0.064063 moveto
/one glyphshow

6.362305 0.064063 moveto
/zero glyphshow

/DejaVuSans findfont
7.0 scalefont
setfont
12.820312 3.892188 moveto
/one glyphshow


grestore
gsave
/o {
gsave
newpath
translate
0.8 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -3.5 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
155.936 38.016 o
grestore
gsave
146.935569 22.094125 translate
64.832727 22.094125 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -914,10 +911,10 @@ grestore
stroke
grestore
} bind def
238.038 38.016 o
183.727 38.016 o
grestore
gsave
229.038410 22.094125 translate
174.727072 22.094125 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -954,10 +951,10 @@ grestore
stroke
grestore
} bind def
320.141 38.016 o
293.621 38.016 o
grestore
gsave
311.141251 23.094125 translate
284.621417 23.094125 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -994,10 +991,10 @@ grestore
stroke
grestore
} bind def
402.244 38.016 o
403.516 38.016 o
grestore
gsave
393.244093 23.094125 translate
394.515762 23.094125 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1035,7 +1032,7 @@ grestore
stroke
grestore
} bind def
61.1148 38.016 o
63.1829 38.016 o
grestore
gsave
/o {
@@ -1055,7 +1052,7 @@ grestore
stroke
grestore
} bind def
65.8761 38.016 o
68.8042 38.016 o
grestore
gsave
/o {
@@ -1075,7 +1072,7 @@ grestore
stroke
grestore
} bind def
70.0759 38.016 o
106.914 38.016 o
grestore
gsave
/o {
@@ -1095,7 +1092,7 @@ grestore
stroke
grestore
} bind def
98.5481 38.016 o
126.266 38.016 o
grestore
gsave
/o {
@@ -1115,7 +1112,7 @@ grestore
stroke
grestore
} bind def
113.006 38.016 o
139.996 38.016 o
grestore
gsave
/o {
@@ -1135,7 +1132,7 @@ grestore
stroke
grestore
} bind def
123.264 38.016 o
150.646 38.016 o
grestore
gsave
/o {
@@ -1155,7 +1152,7 @@ grestore
stroke
grestore
} bind def
131.22 38.016 o
159.347 38.016 o
grestore
gsave
/o {
@@ -1175,7 +1172,7 @@ grestore
stroke
grestore
} bind def
137.721 38.016 o
166.704 38.016 o
grestore
gsave
/o {
@@ -1195,7 +1192,7 @@ grestore
stroke
grestore
} bind def
143.218 38.016 o
173.077 38.016 o
grestore
gsave
/o {
@@ -1215,7 +1212,7 @@ grestore
stroke
grestore
} bind def
147.979 38.016 o
178.699 38.016 o
grestore
gsave
/o {
@@ -1235,7 +1232,7 @@ grestore
stroke
grestore
} bind def
152.179 38.016 o
216.809 38.016 o
grestore
gsave
/o {
@@ -1255,7 +1252,7 @@ grestore
stroke
grestore
} bind def
180.651 38.016 o
236.16 38.016 o
grestore
gsave
/o {
@@ -1275,7 +1272,7 @@ grestore
stroke
grestore
} bind def
195.109 38.016 o
249.89 38.016 o
grestore
gsave
/o {
@@ -1295,7 +1292,7 @@ grestore
stroke
grestore
} bind def
205.366 38.016 o
260.54 38.016 o
grestore
gsave
/o {
@@ -1315,7 +1312,7 @@ grestore
stroke
grestore
} bind def
213.323 38.016 o
269.241 38.016 o
grestore
gsave
/o {
@@ -1335,7 +1332,7 @@ grestore
stroke
grestore
} bind def
219.824 38.016 o
276.599 38.016 o
grestore
gsave
/o {
@@ -1355,7 +1352,7 @@ grestore
stroke
grestore
} bind def
225.321 38.016 o
282.972 38.016 o
grestore
gsave
/o {
@@ -1375,7 +1372,7 @@ grestore
stroke
grestore
} bind def
230.082 38.016 o
288.593 38.016 o
grestore
gsave
/o {
@@ -1395,7 +1392,7 @@ grestore
stroke
grestore
} bind def
234.282 38.016 o
326.703 38.016 o
grestore
gsave
/o {
@@ -1415,7 +1412,7 @@ grestore
stroke
grestore
} bind def
262.754 38.016 o
346.054 38.016 o
grestore
gsave
/o {
@@ -1435,7 +1432,7 @@ grestore
stroke
grestore
} bind def
277.211 38.016 o
359.784 38.016 o
grestore
gsave
/o {
@@ -1455,7 +1452,7 @@ grestore
stroke
grestore
} bind def
287.469 38.016 o
370.434 38.016 o
grestore
gsave
/o {
@@ -1475,7 +1472,7 @@ grestore
stroke
grestore
} bind def
295.426 38.016 o
379.136 38.016 o
grestore
gsave
/o {
@@ -1495,7 +1492,7 @@ grestore
stroke
grestore
} bind def
301.927 38.016 o
386.493 38.016 o
grestore
gsave
/o {
@@ -1515,187 +1512,7 @@ grestore
stroke
grestore
} bind def
307.423 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
312.185 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
316.384 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
344.857 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
359.314 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
369.572 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
377.529 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
384.03 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
389.526 38.016 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
0 -2 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
394.288 38.016 o
392.866 38.016 o
grestore
gsave
/o {
@@ -1761,10 +1578,10 @@ grestore
stroke
grestore
} bind def
57.6 207.038 o
57.6 189.282 o
grestore
gsave
32.600000 203.077429 translate
32.600000 185.321174 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1779,7 +1596,7 @@ setfont
7.0 scalefont
setfont
12.820312 3.892188 moveto
/one glyphshow
/four glyphshow


grestore
@@ -1802,27 +1619,7 @@ grestore
stroke
grestore
} bind def
57.6 78.3052 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
-2 0 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
57.6 110.737 o
57.6 74.8125 o
grestore
gsave
/o {
@@ -1842,7 +1639,7 @@ grestore
stroke
grestore
} bind def
57.6 133.748 o
57.6 102.164 o
grestore
gsave
/o {
@@ -1862,7 +1659,7 @@ grestore
stroke
grestore
} bind def
57.6 151.596 o
57.6 123.38 o
grestore
gsave
/o {
@@ -1882,7 +1679,7 @@ grestore
stroke
grestore
} bind def
57.6 166.179 o
57.6 140.715 o
grestore
gsave
/o {
@@ -1902,7 +1699,7 @@ grestore
stroke
grestore
} bind def
57.6 178.509 o
57.6 155.371 o
grestore
gsave
/o {
@@ -1922,7 +1719,7 @@ grestore
stroke
grestore
} bind def
57.6 189.19 o
57.6 168.066 o
grestore
gsave
/o {
@@ -1942,7 +1739,7 @@ grestore
stroke
grestore
} bind def
57.6 198.611 o
57.6 179.265 o
grestore
gsave
/o {
@@ -1962,7 +1759,7 @@ grestore
stroke
grestore
} bind def
57.6 262.481 o
57.6 255.184 o
grestore
gsave
/o {
@@ -1982,7 +1779,7 @@ grestore
stroke
grestore
} bind def
57.6 294.912 o
57.6 293.734 o
grestore
gsave
26.521875 144.572000 translate
@@ -2033,42 +1830,33 @@ grestore
0.122 0.467 0.706 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 292.032 m
98.548145 241.704772 l
113.005738 206.809656 l
123.263563 185.205499 l
131.220151 174.728789 l
137.721156 160.282245 l
143.217678 147.090423 l
147.978981 142.645919 l
152.178749 133.588727 l
155.935569 125.365214 l
180.650987 98.501457 l
195.108579 65.202114 l
205.366405 72.924813 l
213.322992 58.025446 l
219.823997 51.688523 l
225.320519 72.706071 l
230.081823 66.893456 l
234.28159 50.112 l
238.03841 62.770112 l
262.753828 68.307134 l
277.211421 80.145414 l
287.469246 97.273893 l
295.425833 108.263507 l
301.926839 114.911846 l
307.42336 106.867579 l
312.184664 129.11749 l
316.384431 138.971192 l
320.141251 127.095025 l
344.856669 161.367701 l
359.314262 162.843512 l
369.572087 163.356314 l
377.528675 166.343729 l
384.02968 164.304013 l
389.526202 163.298084 l
394.287505 164.159216 l
398.487273 161.284288 l
73.832727 79.034014 m
106.914221 53.238376 l
126.265655 50.112 l
139.995716 51.113977 l
150.645578 55.919543 l
159.347149 54.873651 l
166.704223 56.650184 l
173.07721 64.108833 l
178.698583 72.316016 l
183.727072 71.493605 l
216.808566 87.646488 l
236.16 100.577826 l
249.890061 114.762514 l
260.539923 100.263346 l
269.241494 114.492972 l
276.598568 113.672187 l
282.971555 128.693029 l
288.592928 134.433449 l
293.621417 150.963935 l
326.702911 182.612315 l
346.054345 204.45973 l
359.784406 218.158651 l
370.434268 244.18933 l
379.135839 256.056618 l
386.492913 269.776485 l
392.8659 277.795862 l
398.487273 292.032 l
stroke
grestore
1.000 setlinewidth
@@ -2100,42 +1888,33 @@ grestore
stroke
grestore
} bind def
73.8327 292.032 o
98.5481 241.705 o
113.006 206.81 o
123.264 185.205 o
131.22 174.729 o
137.721 160.282 o
143.218 147.09 o
147.979 142.646 o
152.179 133.589 o
155.936 125.365 o
180.651 98.5015 o
195.109 65.2021 o
205.366 72.9248 o
213.323 58.0254 o
219.824 51.6885 o
225.321 72.7061 o
230.082 66.8935 o
234.282 50.112 o
238.038 62.7701 o
262.754 68.3071 o
277.211 80.1454 o
287.469 97.2739 o
295.426 108.264 o
301.927 114.912 o
307.423 106.868 o
312.185 129.117 o
316.384 138.971 o
320.141 127.095 o
344.857 161.368 o
359.314 162.844 o
369.572 163.356 o
377.529 166.344 o
384.03 164.304 o
389.526 163.298 o
394.288 164.159 o
398.487 161.284 o
73.8327 79.034 o
106.914 53.2384 o
126.266 50.112 o
139.996 51.114 o
150.646 55.9195 o
159.347 54.8737 o
166.704 56.6502 o
173.077 64.1088 o
178.699 72.316 o
183.727 71.4936 o
216.809 87.6465 o
236.16 100.578 o
249.89 114.763 o
260.54 100.263 o
269.241 114.493 o
276.599 113.672 o
282.972 128.693 o
288.593 134.433 o
293.621 150.964 o
326.703 182.612 o
346.054 204.46 o
359.784 218.159 o
370.434 244.189 o
379.136 256.057 o
386.493 269.776 o
392.866 277.796 o
398.487 292.032 o
grestore
0.800 setlinewidth
0 setlinejoin
@@ -2237,15 +2016,15 @@ grestore
0 setlinecap
0.800 setgray
gsave
202.745937 281.456125 m
269.574062 281.456125 l
270.907396 281.456125 271.574062 282.122792 271.574062 283.456125 c
271.574062 297.128 l
271.574062 298.461333 270.907396 299.128 269.574062 299.128 c
202.745937 299.128 l
201.412604 299.128 200.745937 298.461333 200.745937 297.128 c
200.745937 283.456125 l
200.745937 282.122792 201.412604 281.456125 202.745937 281.456125 c
196.136562 281.456125 m
276.183437 281.456125 l
277.516771 281.456125 278.183437 282.122792 278.183437 283.456125 c
278.183437 297.128 l
278.183437 298.461333 277.516771 299.128 276.183437 299.128 c
196.136562 299.128 l
194.803229 299.128 194.136562 298.461333 194.136562 297.128 c
194.136562 283.456125 l
194.136562 282.122792 194.803229 281.456125 196.136562 281.456125 c
cl
gsave
1.000 setgray
@@ -2259,8 +2038,8 @@ grestore
[] 0 setdash
0.122 0.467 0.706 setrgbcolor
gsave
204.745937 291.03425 m
224.745937 291.03425 l
198.136562 291.03425 m
218.136562 291.03425 l
stroke
grestore
1.000 setlinewidth
@@ -2291,22 +2070,22 @@ grestore
stroke
grestore
} bind def
214.746 291.034 o
208.137 291.034 o
grestore
0.000 setgray
/DejaVuSans findfont
10.000 scalefont
setfont
gsave
232.745937 287.534250 translate
226.136562 287.534250 translate
0.000000 rotate
0.000000 0.000000 m /A glyphshow
6.840820 0.000000 m /c glyphshow
12.338867 0.000000 m /y glyphshow
18.256836 0.000000 m /c glyphshow
23.754883 0.000000 m /l glyphshow
26.533203 0.000000 m /i glyphshow
29.311523 0.000000 m /c glyphshow
0.000000 0.000000 m /E glyphshow
6.318359 0.000000 m /N glyphshow
13.798828 0.000000 m /Z glyphshow
20.649414 0.000000 m /Y glyphshow
26.757812 0.000000 m /M glyphshow
35.385742 0.000000 m /E glyphshow
41.704102 0.000000 m /S glyphshow
grestore

end


+ 2752
- 0
notebooks/test_parallel/myria/28cpus/output_parallel28.txt
File diff suppressed because it is too large
View File


+ 0
- 2092
notebooks/test_parallel/myria/6.eps
File diff suppressed because it is too large
View File


BIN
notebooks/test_parallel/myria/structuralspkernel.Acyclic.npy View File


BIN
notebooks/test_parallel/myria/structuralspkernel.Alkane.npy View File


BIN
notebooks/test_parallel/myria/structuralspkernel.MAO.npy View File


BIN
notebooks/test_parallel/myria/structuralspkernel.MUTAG.npy View File


BIN
notebooks/test_parallel/myria/structuralspkernel.PAH.npy View File


+ 48
- 48
notebooks/test_parallel/myria/structuralspkernel0.eps View File

@@ -1,7 +1,7 @@
%!PS-Adobe-3.0 EPSF-3.0
%%Title: test_parallel/structuralspkernel0.eps
%%Creator: matplotlib version 2.2.3, http://matplotlib.org/
%%CreationDate: Mon Oct 8 11:58:19 2018
%%CreationDate: Tue Oct 9 12:05:57 2018
%%Orientation: portrait
%%BoundingBox: 75 223 536 568
%%EndComments
@@ -1761,10 +1761,10 @@ grestore
stroke
grestore
} bind def
57.6 222.452 o
57.6 225.543 o
grestore
gsave
32.600000 218.491188 translate
32.600000 221.581698 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1802,7 +1802,7 @@ grestore
stroke
grestore
} bind def
57.6 63.1528 o
57.6 74.2123 o
grestore
gsave
/o {
@@ -1822,7 +1822,7 @@ grestore
stroke
grestore
} bind def
57.6 103.285 o
57.6 112.337 o
grestore
gsave
/o {
@@ -1842,7 +1842,7 @@ grestore
stroke
grestore
} bind def
57.6 131.759 o
57.6 139.387 o
grestore
gsave
/o {
@@ -1862,7 +1862,7 @@ grestore
stroke
grestore
} bind def
57.6 153.846 o
57.6 160.368 o
grestore
gsave
/o {
@@ -1882,7 +1882,7 @@ grestore
stroke
grestore
} bind def
57.6 171.892 o
57.6 177.511 o
grestore
gsave
/o {
@@ -1902,7 +1902,7 @@ grestore
stroke
grestore
} bind def
57.6 187.149 o
57.6 192.006 o
grestore
gsave
/o {
@@ -1922,7 +1922,7 @@ grestore
stroke
grestore
} bind def
57.6 200.366 o
57.6 204.561 o
grestore
gsave
/o {
@@ -1942,7 +1942,7 @@ grestore
stroke
grestore
} bind def
57.6 212.024 o
57.6 215.636 o
grestore
gsave
/o {
@@ -1962,7 +1962,7 @@ grestore
stroke
grestore
} bind def
57.6 291.059 o
57.6 290.717 o
grestore
gsave
26.521875 144.572000 translate
@@ -2013,26 +2013,26 @@ grestore
0.122 0.467 0.706 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 202.621342 m
113.005738 103.308965 l
131.220151 66.500729 l
143.217678 56.033572 l
152.178749 55.943256 l
73.832727 205.860873 m
113.005738 107.159187 l
131.220151 66.566683 l
143.217678 55.493485 l
152.178749 56.690546 l
155.935569 50.112 l
195.108579 59.483208 l
213.322992 70.516432 l
225.320519 89.691697 l
234.28159 101.770651 l
238.03841 104.763293 l
277.211421 162.218359 l
295.425833 195.97025 l
307.42336 208.035378 l
316.384431 243.092441 l
320.141251 238.65316 l
359.314262 282.798428 l
377.528675 281.138919 l
195.108579 58.350909 l
213.322992 74.694058 l
225.320519 102.777959 l
234.28159 103.621659 l
238.03841 112.14782 l
277.211421 170.926311 l
295.425833 205.92832 l
307.42336 220.949531 l
316.384431 257.609818 l
320.141251 249.056506 l
359.314262 290.545233 l
377.528675 290.124565 l
389.526202 292.032 l
398.487273 284.710228 l
398.487273 290.9918 l
stroke
grestore
1.000 setlinewidth
@@ -2064,26 +2064,26 @@ grestore
stroke
grestore
} bind def
73.8327 202.621 o
113.006 103.309 o
131.22 66.5007 o
143.218 56.0336 o
152.179 55.9433 o
73.8327 205.861 o
113.006 107.159 o
131.22 66.5667 o
143.218 55.4935 o
152.179 56.6905 o
155.936 50.112 o
195.109 59.4832 o
213.323 70.5164 o
225.321 89.6917 o
234.282 101.771 o
238.038 104.763 o
277.211 162.218 o
295.426 195.97 o
307.423 208.035 o
316.384 243.092 o
320.141 238.653 o
359.314 282.798 o
377.529 281.139 o
195.109 58.3509 o
213.323 74.6941 o
225.321 102.778 o
234.282 103.622 o
238.038 112.148 o
277.211 170.926 o
295.426 205.928 o
307.423 220.95 o
316.384 257.61 o
320.141 249.057 o
359.314 290.545 o
377.529 290.125 o
389.526 292.032 o
398.487 284.71 o
398.487 290.992 o
grestore
0.800 setlinewidth
0 setlinejoin


+ 88
- 88
notebooks/test_parallel/myria/structuralspkernel1.eps View File

@@ -1,7 +1,7 @@
%!PS-Adobe-3.0 EPSF-3.0
%%Title: test_parallel/structuralspkernel1.eps
%%Creator: matplotlib version 2.2.3, http://matplotlib.org/
%%CreationDate: Mon Oct 8 12:01:23 2018
%%CreationDate: Tue Oct 9 12:09:07 2018
%%Orientation: portrait
%%BoundingBox: 75 223 536 568
%%EndComments
@@ -1761,10 +1761,10 @@ grestore
stroke
grestore
} bind def
57.6 217.918 o
57.6 218.249 o
grestore
gsave
32.600000 213.956985 translate
32.600000 214.288064 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1802,7 +1802,7 @@ grestore
stroke
grestore
} bind def
57.6 71.8416 o
57.6 79.1995 o
grestore
gsave
/o {
@@ -1822,7 +1822,7 @@ grestore
stroke
grestore
} bind def
57.6 108.643 o
57.6 114.23 o
grestore
gsave
/o {
@@ -1842,7 +1842,7 @@ grestore
stroke
grestore
} bind def
57.6 134.753 o
57.6 139.085 o
grestore
gsave
/o {
@@ -1862,7 +1862,7 @@ grestore
stroke
grestore
} bind def
57.6 155.006 o
57.6 158.364 o
grestore
gsave
/o {
@@ -1882,7 +1882,7 @@ grestore
stroke
grestore
} bind def
57.6 171.554 o
57.6 174.116 o
grestore
gsave
/o {
@@ -1902,7 +1902,7 @@ grestore
stroke
grestore
} bind def
57.6 185.545 o
57.6 187.434 o
grestore
gsave
/o {
@@ -1922,7 +1922,7 @@ grestore
stroke
grestore
} bind def
57.6 197.665 o
57.6 198.97 o
grestore
gsave
/o {
@@ -1942,7 +1942,7 @@ grestore
stroke
grestore
} bind def
57.6 208.355 o
57.6 209.146 o
grestore
gsave
/o {
@@ -1962,7 +1962,7 @@ grestore
stroke
grestore
} bind def
57.6 280.83 o
57.6 278.134 o
grestore
gsave
26.521875 144.572000 translate
@@ -2013,26 +2013,26 @@ grestore
0.122 0.467 0.706 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 199.733236 m
113.005738 108.664494 l
131.220151 74.911604 l
143.217678 65.313296 l
152.178749 65.230477 l
155.935569 59.883257 l
195.108579 68.476587 l
213.322992 78.593975 l
225.320519 96.177557 l
234.28159 107.253871 l
238.03841 109.998103 l
277.211421 162.683989 l
295.425833 193.634232 l
307.42336 204.697868 l
316.384431 236.844946 l
320.141251 232.774156 l
359.314262 273.255052 l
377.528675 271.733294 l
389.526202 281.722172 l
398.487273 275.00816 l
73.832727 200.164454 m
113.005738 109.472606 l
131.220151 72.174264 l
143.217678 61.999678 l
152.178749 63.099595 l
155.935569 57.054912 l
195.108579 64.625217 l
213.322992 79.642087 l
225.320519 105.446923 l
234.28159 106.222155 l
238.03841 114.056402 l
277.211421 168.064901 l
295.425833 200.226427 l
307.42336 214.028637 l
316.384431 247.713869 l
320.141251 239.854676 l
359.314262 277.976509 l
377.528675 277.58998 l
389.526202 279.342622 l
398.487273 278.386837 l
stroke
grestore
1.000 setlinewidth
@@ -2064,52 +2064,52 @@ grestore
stroke
grestore
} bind def
73.8327 199.733 o
113.006 108.664 o
131.22 74.9116 o
143.218 65.3133 o
152.179 65.2305 o
155.936 59.8833 o
195.109 68.4766 o
213.323 78.594 o
225.321 96.1776 o
234.282 107.254 o
238.038 109.998 o
277.211 162.684 o
295.426 193.634 o
307.423 204.698 o
316.384 236.845 o
320.141 232.774 o
359.314 273.255 o
377.529 271.733 o
389.526 281.722 o
398.487 275.008 o
73.8327 200.164 o
113.006 109.473 o
131.22 72.1743 o
143.218 61.9997 o
152.179 63.0996 o
155.936 57.0549 o
195.109 64.6252 o
213.323 79.6421 o
225.321 105.447 o
234.282 106.222 o
238.038 114.056 o
277.211 168.065 o
295.426 200.226 o
307.423 214.029 o
316.384 247.714 o
320.141 239.855 o
359.314 277.977 o
377.529 277.59 o
389.526 279.343 o
398.487 278.387 o
grestore
1.500 setlinewidth
2 setlinecap
1.000 0.498 0.055 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 164.448821 m
113.005738 83.232404 l
131.220151 65.842678 l
143.217678 59.810203 l
73.832727 158.204873 m
113.005738 76.739141 l
131.220151 62.362632 l
143.217678 51.032986 l
152.178749 50.112 l
155.935569 55.111361 l
195.108579 68.597414 l
213.322992 78.546193 l
225.320519 101.7951 l
234.28159 118.544452 l
238.03841 119.083123 l
277.211421 195.116346 l
295.425833 241.877697 l
307.42336 235.831691 l
316.384431 257.658763 l
320.141251 274.272637 l
155.935569 55.00075 l
195.108579 65.617563 l
213.322992 80.05241 l
225.320519 105.904092 l
234.28159 123.217917 l
238.03841 128.907987 l
277.211421 199.623687 l
295.425833 243.333855 l
307.42336 237.221499 l
316.384431 261.382228 l
320.141251 273.832075 l
359.314262 292.032 l
377.528675 288.682275 l
389.526202 289.817148 l
398.487273 287.839196 l
377.528675 291.224097 l
389.526202 287.102333 l
398.487273 290.56847 l
stroke
grestore
1.000 setlinewidth
@@ -2141,26 +2141,26 @@ grestore
stroke
grestore
} bind def
73.8327 164.449 o
113.006 83.2324 o
131.22 65.8427 o
143.218 59.8102 o
73.8327 158.205 o
113.006 76.7391 o
131.22 62.3626 o
143.218 51.033 o
152.179 50.112 o
155.936 55.1114 o
195.109 68.5974 o
213.323 78.5462 o
225.321 101.795 o
234.282 118.544 o
238.038 119.083 o
277.211 195.116 o
295.426 241.878 o
307.423 235.832 o
316.384 257.659 o
320.141 274.273 o
155.936 55.0008 o
195.109 65.6176 o
213.323 80.0524 o
225.321 105.904 o
234.282 123.218 o
238.038 128.908 o
277.211 199.624 o
295.426 243.334 o
307.423 237.221 o
316.384 261.382 o
320.141 273.832 o
359.314 292.032 o
377.529 288.682 o
389.526 289.817 o
398.487 287.839 o
377.529 291.224 o
389.526 287.102 o
398.487 290.568 o
grestore
0.800 setlinewidth
0 setlinejoin


+ 156
- 136
notebooks/test_parallel/myria/structuralspkernel2.eps View File

@@ -1,7 +1,7 @@
%!PS-Adobe-3.0 EPSF-3.0
%%Title: test_parallel/structuralspkernel2.eps
%%Creator: matplotlib version 2.2.3, http://matplotlib.org/
%%CreationDate: Mon Oct 8 12:01:43 2018
%%CreationDate: Tue Oct 9 12:09:26 2018
%%Orientation: portrait
%%BoundingBox: 75 223 536 568
%%EndComments
@@ -1797,10 +1797,10 @@ grestore
stroke
grestore
} bind def
57.6 79.1032 o
57.6 90.0497 o
grestore
gsave
32.600000 74.642307 translate
32.600000 85.588743 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1837,10 +1837,10 @@ grestore
stroke
grestore
} bind def
57.6 236.289 o
57.6 237.386 o
grestore
gsave
32.600000 232.327851 translate
32.600000 233.425364 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1878,7 +1878,7 @@ grestore
stroke
grestore
} bind def
57.6 44.2318 o
57.6 45.6969 o
grestore
gsave
/o {
@@ -1898,7 +1898,7 @@ grestore
stroke
grestore
} bind def
57.6 54.7549 o
57.6 57.3632 o
grestore
gsave
/o {
@@ -1918,7 +1918,7 @@ grestore
stroke
grestore
} bind def
57.6 63.8704 o
57.6 67.2269 o
grestore
gsave
/o {
@@ -1938,7 +1938,7 @@ grestore
stroke
grestore
} bind def
57.6 71.9108 o
57.6 75.7713 o
grestore
gsave
/o {
@@ -1958,7 +1958,7 @@ grestore
stroke
grestore
} bind def
57.6 126.421 o
57.6 83.3079 o
grestore
gsave
/o {
@@ -1978,7 +1978,7 @@ grestore
stroke
grestore
} bind def
57.6 154.1 o
57.6 134.402 o
grestore
gsave
/o {
@@ -1998,7 +1998,7 @@ grestore
stroke
grestore
} bind def
57.6 173.738 o
57.6 160.347 o
grestore
gsave
/o {
@@ -2018,7 +2018,7 @@ grestore
stroke
grestore
} bind def
57.6 188.971 o
57.6 178.755 o
grestore
gsave
/o {
@@ -2038,7 +2038,7 @@ grestore
stroke
grestore
} bind def
57.6 201.417 o
57.6 193.034 o
grestore
gsave
/o {
@@ -2058,7 +2058,7 @@ grestore
stroke
grestore
} bind def
57.6 211.94 o
57.6 204.7 o
grestore
gsave
/o {
@@ -2078,7 +2078,7 @@ grestore
stroke
grestore
} bind def
57.6 221.056 o
57.6 214.564 o
grestore
gsave
/o {
@@ -2098,7 +2098,7 @@ grestore
stroke
grestore
} bind def
57.6 229.096 o
57.6 223.108 o
grestore
gsave
/o {
@@ -2118,7 +2118,27 @@ grestore
stroke
grestore
} bind def
57.6 283.606 o
57.6 230.645 o
grestore
gsave
/o {
gsave
newpath
translate
0.6 setlinewidth
1 setlinejoin
0 setlinecap
0 0 m
-2 0 l

gsave
0.000 setgray
fill
grestore
stroke
grestore
} bind def
57.6 281.739 o
grestore
gsave
26.521875 144.572000 translate
@@ -2169,26 +2189,26 @@ grestore
0.122 0.467 0.706 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 222.611591 m
113.005738 154.116315 l
131.220151 128.729849 l
143.217678 121.510701 l
152.178749 121.44841 l
155.935569 117.426621 l
195.108579 123.889898 l
213.322992 131.499461 l
225.320519 144.72455 l
234.28159 153.055347 l
238.03841 155.119359 l
277.211421 194.745845 l
295.425833 218.024365 l
307.42336 226.345626 l
316.384431 250.524317 l
320.141251 247.462565 l
359.314262 277.909347 l
377.528675 276.764792 l
389.526202 284.277698 l
398.487273 279.227907 l
73.832727 223.992393 m
113.005738 156.82354 l
131.220151 129.199372 l
143.217678 121.663796 l
152.178749 122.478425 l
155.935569 118.001568 l
195.108579 123.608342 l
213.322992 134.730245 l
225.320519 153.842009 l
234.28159 154.416167 l
238.03841 160.218424 l
277.211421 200.21859 l
295.425833 224.038292 l
307.42336 234.260585 l
316.384431 259.208785 l
320.141251 253.388052 l
359.314262 281.622121 l
377.528675 281.335846 l
389.526202 282.633901 l
398.487273 281.926021 l
stroke
grestore
1.000 setlinewidth
@@ -2220,52 +2240,52 @@ grestore
stroke
grestore
} bind def
73.8327 222.612 o
113.006 154.116 o
131.22 128.73 o
143.218 121.511 o
152.179 121.448 o
155.936 117.427 o
195.109 123.89 o
213.323 131.499 o
225.321 144.725 o
234.282 153.055 o
238.038 155.119 o
277.211 194.746 o
295.426 218.024 o
307.423 226.346 o
316.384 250.524 o
320.141 247.463 o
359.314 277.909 o
377.529 276.765 o
389.526 284.278 o
398.487 279.228 o
73.8327 223.992 o
113.006 156.824 o
131.22 129.199 o
143.218 121.664 o
152.179 122.478 o
155.936 118.002 o
195.109 123.608 o
213.323 134.73 o
225.321 153.842 o
234.282 154.416 o
238.038 160.218 o
277.211 200.219 o
295.426 224.038 o
307.423 234.261 o
316.384 259.209 o
320.141 253.388 o
359.314 281.622 o
377.529 281.336 o
389.526 282.634 o
398.487 281.926 o
grestore
1.500 setlinewidth
2 setlinecap
1.000 0.498 0.055 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 196.073223 m
113.005738 134.988149 l
131.220151 121.908864 l
143.217678 117.371675 l
152.178749 110.077393 l
155.935569 113.837548 l
195.108579 123.980775 l
213.322992 131.463523 l
225.320519 148.949657 l
234.28159 161.5473 l
238.03841 161.952449 l
277.211421 219.139103 l
295.425833 254.309585 l
307.42336 249.76222 l
316.384431 266.178954 l
320.141251 278.674701 l
73.832727 192.915985 m
113.005738 132.580244 l
131.220151 121.93261 l
143.217678 113.541565 l
152.178749 112.859458 l
155.935569 116.480199 l
195.108579 124.3433 l
213.322992 135.034141 l
225.320519 154.180601 l
234.28159 167.003691 l
238.03841 171.217912 l
277.211421 223.591886 l
295.425833 255.964828 l
307.42336 251.43785 l
316.384431 269.331944 l
320.141251 278.55264 l
359.314262 292.032 l
377.528675 289.51258 l
389.526202 290.366149 l
398.487273 288.878478 l
377.528675 291.433645 l
389.526202 288.380954 l
398.487273 290.94807 l
stroke
grestore
1.000 setlinewidth
@@ -2297,52 +2317,52 @@ grestore
stroke
grestore
} bind def
73.8327 196.073 o
113.006 134.988 o
131.22 121.909 o
143.218 117.372 o
152.179 110.077 o
155.936 113.838 o
195.109 123.981 o
213.323 131.464 o
225.321 148.95 o
234.282 161.547 o
238.038 161.952 o
277.211 219.139 o
295.426 254.31 o
307.423 249.762 o
316.384 266.179 o
320.141 278.675 o
73.8327 192.916 o
113.006 132.58 o
131.22 121.933 o
143.218 113.542 o
152.179 112.859 o
155.936 116.48 o
195.109 124.343 o
213.323 135.034 o
225.321 154.181 o
234.282 167.004 o
238.038 171.218 o
277.211 223.592 o
295.426 255.965 o
307.423 251.438 o
316.384 269.332 o
320.141 278.553 o
359.314 292.032 o
377.529 289.513 o
389.526 290.366 o
398.487 288.878 o
377.529 291.434 o
389.526 288.381 o
398.487 290.948 o
grestore
1.500 setlinewidth
2 setlinecap
0.173 0.627 0.173 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 134.338468 m
113.005738 84.399621 l
131.220151 69.525336 l
143.217678 71.741215 l
152.178749 58.940969 l
155.935569 60.345346 l
195.108579 60.441594 l
213.322992 50.237689 l
225.320519 50.112 l
234.28159 62.327739 l
238.03841 60.220402 l
277.211421 68.688557 l
295.425833 82.121603 l
307.42336 76.062713 l
316.384431 76.261657 l
320.141251 82.027501 l
359.314262 76.067019 l
377.528675 75.914062 l
389.526202 86.491885 l
398.487273 74.446055 l
73.832727 142.289736 m
113.005738 96.491264 l
131.220151 80.21873 l
143.217678 76.716082 l
152.178749 60.779402 l
155.935569 69.010314 l
195.108579 72.147903 l
213.322992 50.112 l
225.320519 59.661759 l
234.28159 69.434121 l
238.03841 59.854293 l
277.211421 85.980529 l
295.425833 91.247288 l
307.42336 86.491343 l
316.384431 80.795679 l
320.141251 94.5509 l
359.314262 78.994121 l
377.528675 84.248507 l
389.526202 95.627214 l
398.487273 84.613323 l
stroke
grestore
1.000 setlinewidth
@@ -2374,26 +2394,26 @@ grestore
stroke
grestore
} bind def
73.8327 134.338 o
113.006 84.3996 o
131.22 69.5253 o
143.218 71.7412 o
152.179 58.941 o
155.936 60.3453 o
195.109 60.4416 o
213.323 50.2377 o
225.321 50.112 o
234.282 62.3277 o
238.038 60.2204 o
277.211 68.6886 o
295.426 82.1216 o
307.423 76.0627 o
316.384 76.2617 o
320.141 82.0275 o
359.314 76.067 o
377.529 75.9141 o
389.526 86.4919 o
398.487 74.4461 o
73.8327 142.29 o
113.006 96.4913 o
131.22 80.2187 o
143.218 76.7161 o
152.179 60.7794 o
155.936 69.0103 o
195.109 72.1479 o
213.323 50.112 o
225.321 59.6618 o
234.282 69.4341 o
238.038 59.8543 o
277.211 85.9805 o
295.426 91.2473 o
307.423 86.4913 o
316.384 80.7957 o
320.141 94.5509 o
359.314 78.9941 o
377.529 84.2485 o
389.526 95.6272 o
398.487 84.6133 o
grestore
0.800 setlinewidth
0 setlinejoin


+ 186
- 186
notebooks/test_parallel/myria/structuralspkernel3.eps View File

@@ -1,7 +1,7 @@
%!PS-Adobe-3.0 EPSF-3.0
%%Title: test_parallel/structuralspkernel3.eps
%%Creator: matplotlib version 2.2.3, http://matplotlib.org/
%%CreationDate: Mon Oct 8 12:20:54 2018
%%CreationDate: Tue Oct 9 12:30:09 2018
%%Orientation: portrait
%%BoundingBox: 75 223 536 568
%%EndComments
@@ -1832,10 +1832,10 @@ grestore
stroke
grestore
} bind def
57.6 69.7945 o
57.6 77.8711 o
grestore
gsave
32.600000 65.333545 translate
32.600000 73.410135 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1872,10 +1872,10 @@ grestore
stroke
grestore
} bind def
57.6 176.51 o
57.6 180.279 o
grestore
gsave
32.600000 172.548582 translate
32.600000 176.317888 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1912,10 +1912,10 @@ grestore
stroke
grestore
} bind def
57.6 283.225 o
57.6 282.687 o
grestore
gsave
32.600000 278.763619 translate
32.600000 278.225640 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1953,7 +1953,7 @@ grestore
stroke
grestore
} bind def
57.6 37.6701 o
57.6 47.0433 o
grestore
gsave
/o {
@@ -1973,7 +1973,7 @@ grestore
stroke
grestore
} bind def
57.6 46.1199 o
57.6 55.152 o
grestore
gsave
/o {
@@ -1993,7 +1993,7 @@ grestore
stroke
grestore
} bind def
57.6 53.2641 o
57.6 62.0079 o
grestore
gsave
/o {
@@ -2013,7 +2013,7 @@ grestore
stroke
grestore
} bind def
57.6 59.4527 o
57.6 67.9467 o
grestore
gsave
/o {
@@ -2033,7 +2033,7 @@ grestore
stroke
grestore
} bind def
57.6 64.9115 o
57.6 73.1852 o
grestore
gsave
/o {
@@ -2053,7 +2053,7 @@ grestore
stroke
grestore
} bind def
57.6 101.919 o
57.6 108.699 o
grestore
gsave
/o {
@@ -2073,7 +2073,7 @@ grestore
stroke
grestore
} bind def
57.6 120.71 o
57.6 126.732 o
grestore
gsave
/o {
@@ -2093,7 +2093,7 @@ grestore
stroke
grestore
} bind def
57.6 134.043 o
57.6 139.527 o
grestore
gsave
/o {
@@ -2113,7 +2113,7 @@ grestore
stroke
grestore
} bind def
57.6 144.385 o
57.6 149.451 o
grestore
gsave
/o {
@@ -2133,7 +2133,7 @@ grestore
stroke
grestore
} bind def
57.6 152.835 o
57.6 157.56 o
grestore
gsave
/o {
@@ -2153,7 +2153,7 @@ grestore
stroke
grestore
} bind def
57.6 159.979 o
57.6 164.416 o
grestore
gsave
/o {
@@ -2173,7 +2173,7 @@ grestore
stroke
grestore
} bind def
57.6 166.168 o
57.6 170.354 o
grestore
gsave
/o {
@@ -2193,7 +2193,7 @@ grestore
stroke
grestore
} bind def
57.6 171.627 o
57.6 175.593 o
grestore
gsave
/o {
@@ -2213,7 +2213,7 @@ grestore
stroke
grestore
} bind def
57.6 208.634 o
57.6 211.107 o
grestore
gsave
/o {
@@ -2233,7 +2233,7 @@ grestore
stroke
grestore
} bind def
57.6 227.426 o
57.6 229.14 o
grestore
gsave
/o {
@@ -2253,7 +2253,7 @@ grestore
stroke
grestore
} bind def
57.6 240.758 o
57.6 241.934 o
grestore
gsave
/o {
@@ -2273,7 +2273,7 @@ grestore
stroke
grestore
} bind def
57.6 251.1 o
57.6 251.859 o
grestore
gsave
/o {
@@ -2293,7 +2293,7 @@ grestore
stroke
grestore
} bind def
57.6 259.55 o
57.6 259.968 o
grestore
gsave
/o {
@@ -2313,7 +2313,7 @@ grestore
stroke
grestore
} bind def
57.6 266.694 o
57.6 266.823 o
grestore
gsave
/o {
@@ -2333,7 +2333,7 @@ grestore
stroke
grestore
} bind def
57.6 272.883 o
57.6 272.762 o
grestore
gsave
/o {
@@ -2353,7 +2353,7 @@ grestore
stroke
grestore
} bind def
57.6 278.342 o
57.6 278.001 o
grestore
gsave
26.521875 144.572000 translate
@@ -2404,26 +2404,26 @@ grestore
0.122 0.467 0.706 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 167.223916 m
113.005738 120.721702 l
131.220151 103.486544 l
143.217678 98.585383 l
152.178749 98.543093 l
155.935569 95.812655 l
195.108579 100.200647 l
213.322992 105.366864 l
225.320519 114.345527 l
234.28159 120.001399 l
238.03841 121.402679 l
277.211421 148.305548 l
295.425833 164.109597 l
307.42336 169.758995 l
316.384431 186.17418 l
320.141251 184.095523 l
359.314262 204.766186 l
377.528675 203.989135 l
389.526202 209.089732 l
398.487273 205.661372 l
73.832727 170.969258 m
113.005738 124.282894 l
131.220151 105.082447 l
143.217678 99.844772 l
152.178749 100.410987 l
155.935569 97.299304 l
195.108579 101.196347 l
213.322992 108.926734 l
225.320519 122.210552 l
234.28159 122.609626 l
238.03841 126.642541 l
277.211421 154.445045 l
295.425833 171.001161 l
307.42336 178.106265 l
316.384431 195.446754 l
320.141251 191.400997 l
359.314262 211.025361 l
377.528675 210.826384 l
389.526202 211.728609 l
398.487273 211.23659 l
stroke
grestore
1.000 setlinewidth
@@ -2455,52 +2455,52 @@ grestore
stroke
grestore
} bind def
73.8327 167.224 o
113.006 120.722 o
131.22 103.487 o
143.218 98.5854 o
152.179 98.5431 o
155.936 95.8127 o
195.109 100.201 o
213.323 105.367 o
225.321 114.346 o
234.282 120.001 o
238.038 121.403 o
277.211 148.306 o
295.426 164.11 o
307.423 169.759 o
316.384 186.174 o
320.141 184.096 o
359.314 204.766 o
377.529 203.989 o
389.526 209.09 o
398.487 205.661 o
73.8327 170.969 o
113.006 124.283 o
131.22 105.082 o
143.218 99.8448 o
152.179 100.411 o
155.936 97.2993 o
195.109 101.196 o
213.323 108.927 o
225.321 122.211 o
234.282 122.61 o
238.038 126.643 o
277.211 154.445 o
295.426 171.001 o
307.423 178.106 o
316.384 195.447 o
320.141 191.401 o
359.314 211.025 o
377.529 210.826 o
389.526 211.729 o
398.487 211.237 o
grestore
1.500 setlinewidth
2 setlinecap
1.000 0.498 0.055 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 149.20672 m
113.005738 107.735374 l
131.220151 98.8557 l
143.217678 95.775352 l
152.178749 90.823181 l
155.935569 93.375993 l
195.108579 100.262344 l
213.322992 105.342466 l
225.320519 117.214 l
234.28159 125.766681 l
238.03841 126.041741 l
277.211421 164.866405 l
295.425833 188.744042 l
307.42336 185.656785 l
316.384431 196.80229 l
320.141251 205.285794 l
359.314262 214.354215 l
377.528675 212.643752 l
389.526202 213.22325 l
398.487273 212.213253 l
73.832727 149.369298 m
113.005738 107.432355 l
131.220151 100.031613 l
143.217678 94.199336 l
152.178749 93.725231 l
155.935569 96.241863 l
195.108579 101.707187 l
213.322992 109.13796 l
225.320519 122.445893 l
234.28159 131.358707 l
238.03841 134.287842 l
277.211421 170.690882 l
295.425833 193.19201 l
307.42336 190.045491 l
316.384431 202.482955 l
320.141251 208.891889 l
359.314262 218.260849 l
377.528675 217.844957 l
389.526202 215.723154 l
398.487273 217.507453 l
stroke
grestore
1.000 setlinewidth
@@ -2532,52 +2532,52 @@ grestore
stroke
grestore
} bind def
73.8327 149.207 o
113.006 107.735 o
131.22 98.8557 o
143.218 95.7754 o
152.179 90.8232 o
155.936 93.376 o
195.109 100.262 o
213.323 105.342 o
225.321 117.214 o
234.282 125.767 o
238.038 126.042 o
277.211 164.866 o
295.426 188.744 o
307.423 185.657 o
316.384 196.802 o
320.141 205.286 o
359.314 214.354 o
377.529 212.644 o
389.526 213.223 o
398.487 212.213 o
73.8327 149.369 o
113.006 107.432 o
131.22 100.032 o
143.218 94.1993 o
152.179 93.7252 o
155.936 96.2419 o
195.109 101.707 o
213.323 109.138 o
225.321 122.446 o
234.282 131.359 o
238.038 134.288 o
277.211 170.691 o
295.426 193.192 o
307.423 190.045 o
316.384 202.483 o
320.141 208.892 o
359.314 218.261 o
377.529 217.845 o
389.526 215.723 o
398.487 217.507 o
grestore
1.500 setlinewidth
2 setlinecap
0.173 0.627 0.173 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 107.294298 m
113.005738 73.390252 l
131.220151 63.291933 l
143.217678 64.796318 l
152.178749 56.106086 l
155.935569 57.059534 l
195.108579 57.124878 l
213.322992 50.197332 l
225.320519 50.112 l
234.28159 58.405403 l
238.03841 56.974708 l
277.211421 62.723834 l
295.425833 71.84368 l
307.42336 67.730232 l
316.384431 67.865297 l
320.141251 71.779793 l
359.314262 67.733155 l
377.528675 67.629311 l
389.526202 74.810714 l
398.487273 66.632664 l
73.832727 114.181032 m
113.005738 82.348359 l
131.220151 71.037975 l
143.217678 68.603426 l
152.178749 57.526481 l
155.935569 63.247457 l
195.108579 65.428269 l
213.322992 50.112 l
225.320519 56.749653 l
234.28159 63.542028 l
238.03841 56.883476 l
277.211421 75.04277 l
295.425833 78.703482 l
307.42336 75.397816 l
316.384431 71.43899 l
320.141251 80.999689 l
359.314262 70.186799 l
377.528675 73.838911 l
389.526202 81.747792 l
398.487273 74.09248 l
stroke
grestore
1.000 setlinewidth
@@ -2609,52 +2609,52 @@ grestore
stroke
grestore
} bind def
73.8327 107.294 o
113.006 73.3903 o
131.22 63.2919 o
143.218 64.7963 o
152.179 56.1061 o
155.936 57.0595 o
195.109 57.1249 o
213.323 50.1973 o
225.321 50.112 o
234.282 58.4054 o
238.038 56.9747 o
277.211 62.7238 o
295.426 71.8437 o
307.423 67.7302 o
316.384 67.8653 o
320.141 71.7798 o
359.314 67.7332 o
377.529 67.6293 o
389.526 74.8107 o
398.487 66.6327 o
73.8327 114.181 o
113.006 82.3484 o
131.22 71.038 o
143.218 68.6034 o
152.179 57.5265 o
155.936 63.2475 o
195.109 65.4283 o
213.323 50.112 o
225.321 56.7497 o
234.282 63.542 o
238.038 56.8835 o
277.211 75.0428 o
295.426 78.7035 o
307.423 75.3978 o
316.384 71.439 o
320.141 80.9997 o
359.314 70.1868 o
377.529 73.8389 o
389.526 81.7478 o
398.487 74.0925 o
grestore
1.500 setlinewidth
2 setlinecap
0.839 0.153 0.157 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 168.531713 m
113.005738 155.271523 l
131.220151 159.088501 l
143.217678 160.730006 l
152.178749 163.447602 l
155.935569 163.92952 l
195.108579 187.226475 l
213.322992 209.175943 l
225.320519 222.059445 l
234.28159 232.054189 l
238.03841 233.636904 l
277.211421 271.847258 l
295.425833 288.232836 l
307.42336 287.811831 l
316.384431 288.580625 l
320.141251 288.106587 l
359.314262 288.497093 l
377.528675 288.020579 l
73.832727 167.064745 m
113.005738 157.538802 l
131.220151 161.814283 l
143.217678 163.921782 l
152.178749 168.027396 l
155.935569 168.709786 l
195.108579 192.365371 l
213.322992 214.154922 l
225.320519 225.913191 l
234.28159 235.542717 l
238.03841 238.599374 l
277.211421 276.321398 l
295.425833 291.41635 l
307.42336 291.83744 l
316.384431 291.404897 l
320.141251 291.76183 l
359.314262 291.196301 l
377.528675 291.814576 l
389.526202 292.032 l
398.487273 287.827596 l
398.487273 291.70573 l
stroke
grestore
1.000 setlinewidth
@@ -2686,26 +2686,26 @@ grestore
stroke
grestore
} bind def
73.8327 168.532 o
113.006 155.272 o
131.22 159.089 o
143.218 160.73 o
152.179 163.448 o
155.936 163.93 o
195.109 187.226 o
213.323 209.176 o
225.321 222.059 o
234.282 232.054 o
238.038 233.637 o
277.211 271.847 o
295.426 288.233 o
307.423 287.812 o
316.384 288.581 o
320.141 288.107 o
359.314 288.497 o
377.529 288.021 o
73.8327 167.065 o
113.006 157.539 o
131.22 161.814 o
143.218 163.922 o
152.179 168.027 o
155.936 168.71 o
195.109 192.365 o
213.323 214.155 o
225.321 225.913 o
234.282 235.543 o
238.038 238.599 o
277.211 276.321 o
295.426 291.416 o
307.423 291.837 o
316.384 291.405 o
320.141 291.762 o
359.314 291.196 o
377.529 291.815 o
389.526 292.032 o
398.487 287.828 o
398.487 291.706 o
grestore
0.800 setlinewidth
0 setlinejoin


+ 226
- 226
notebooks/test_parallel/myria/structuralspkernel4.eps View File

@@ -1,7 +1,7 @@
%!PS-Adobe-3.0 EPSF-3.0
%%Title: test_parallel/structuralspkernel4.eps
%%Creator: matplotlib version 2.2.3, http://matplotlib.org/
%%CreationDate: Mon Oct 8 12:25:50 2018
%%CreationDate: Tue Oct 9 12:35:19 2018
%%Orientation: portrait
%%BoundingBox: 75 223 536 568
%%EndComments
@@ -1885,10 +1885,10 @@ grestore
stroke
grestore
} bind def
57.6 69.6161 o
57.6 77.5542 o
grestore
gsave
32.600000 65.155155 translate
32.600000 73.093254 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1925,10 +1925,10 @@ grestore
stroke
grestore
} bind def
57.6 175.364 o
57.6 178.793 o
grestore
gsave
32.600000 171.402987 translate
32.600000 174.831978 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -1965,10 +1965,10 @@ grestore
stroke
grestore
} bind def
57.6 281.112 o
57.6 280.032 o
grestore
gsave
32.600000 276.650820 translate
32.600000 275.570701 translate
0.000000 rotate
/DejaVuSans findfont
10.0 scalefont
@@ -2006,7 +2006,7 @@ grestore
stroke
grestore
} bind def
57.6 37.7828 o
57.6 47.0783 o
grestore
gsave
/o {
@@ -2026,7 +2026,7 @@ grestore
stroke
grestore
} bind def
57.6 46.1561 o
57.6 55.0945 o
grestore
gsave
/o {
@@ -2046,7 +2046,7 @@ grestore
stroke
grestore
} bind def
57.6 53.2355 o
57.6 61.8721 o
grestore
gsave
/o {
@@ -2066,7 +2066,7 @@ grestore
stroke
grestore
} bind def
57.6 59.3681 o
57.6 67.7431 o
grestore
gsave
/o {
@@ -2086,7 +2086,7 @@ grestore
stroke
grestore
} bind def
57.6 64.7773 o
57.6 72.9218 o
grestore
gsave
/o {
@@ -2106,7 +2106,7 @@ grestore
stroke
grestore
} bind def
57.6 101.449 o
57.6 108.03 o
grestore
gsave
/o {
@@ -2126,7 +2126,7 @@ grestore
stroke
grestore
} bind def
57.6 120.071 o
57.6 125.857 o
grestore
gsave
/o {
@@ -2146,7 +2146,7 @@ grestore
stroke
grestore
} bind def
57.6 133.283 o
57.6 138.506 o
grestore
gsave
/o {
@@ -2166,7 +2166,7 @@ grestore
stroke
grestore
} bind def
57.6 143.531 o
57.6 148.317 o
grestore
gsave
/o {
@@ -2186,7 +2186,7 @@ grestore
stroke
grestore
} bind def
57.6 151.904 o
57.6 156.333 o
grestore
gsave
/o {
@@ -2206,7 +2206,7 @@ grestore
stroke
grestore
} bind def
57.6 158.983 o
57.6 163.111 o
grestore
gsave
/o {
@@ -2226,7 +2226,7 @@ grestore
stroke
grestore
} bind def
57.6 165.116 o
57.6 168.982 o
grestore
gsave
/o {
@@ -2246,7 +2246,7 @@ grestore
stroke
grestore
} bind def
57.6 170.525 o
57.6 174.16 o
grestore
gsave
/o {
@@ -2266,7 +2266,7 @@ grestore
stroke
grestore
} bind def
57.6 207.197 o
57.6 209.269 o
grestore
gsave
/o {
@@ -2286,7 +2286,7 @@ grestore
stroke
grestore
} bind def
57.6 225.818 o
57.6 227.096 o
grestore
gsave
/o {
@@ -2306,7 +2306,7 @@ grestore
stroke
grestore
} bind def
57.6 239.03 o
57.6 239.745 o
grestore
gsave
/o {
@@ -2326,7 +2326,7 @@ grestore
stroke
grestore
} bind def
57.6 249.278 o
57.6 249.556 o
grestore
gsave
/o {
@@ -2346,7 +2346,7 @@ grestore
stroke
grestore
} bind def
57.6 257.652 o
57.6 257.572 o
grestore
gsave
/o {
@@ -2366,7 +2366,7 @@ grestore
stroke
grestore
} bind def
57.6 264.731 o
57.6 264.35 o
grestore
gsave
/o {
@@ -2386,7 +2386,7 @@ grestore
stroke
grestore
} bind def
57.6 270.864 o
57.6 270.221 o
grestore
gsave
/o {
@@ -2406,7 +2406,7 @@ grestore
stroke
grestore
} bind def
57.6 276.273 o
57.6 275.399 o
grestore
gsave
26.521875 144.572000 translate
@@ -2457,26 +2457,26 @@ grestore
0.122 0.467 0.706 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 166.162481 m
113.005738 120.081736 l
131.220151 103.002788 l
143.217678 98.146048 l
152.178749 98.104141 l
155.935569 95.39845 l
195.108579 99.746672 l
213.322992 104.866066 l
225.320519 113.763351 l
234.28159 119.367961 l
238.03841 120.756541 l
277.211421 147.415578 l
295.425833 163.076388 l
307.42336 168.674583 l
316.384431 184.94099 l
320.141251 182.881173 l
359.314262 203.364489 l
377.528675 202.59448 l
389.526202 207.648848 l
398.487273 204.251561 l
73.832727 169.589621 m
113.005738 123.436201 l
131.220151 104.454936 l
143.217678 99.277051 l
152.178749 99.836803 l
155.935569 96.760641 l
195.108579 100.613198 l
213.322992 108.255339 l
225.320519 121.387516 l
234.28159 121.782035 l
238.03841 125.768912 l
277.211421 153.254039 l
295.425833 169.62116 l
307.42336 176.645156 l
316.384431 193.787696 l
320.141251 189.788123 l
359.314262 209.188467 l
377.528675 208.99176 l
389.526202 209.883687 l
398.487273 209.397284 l
stroke
grestore
1.000 setlinewidth
@@ -2508,52 +2508,52 @@ grestore
stroke
grestore
} bind def
73.8327 166.162 o
113.006 120.082 o
131.22 103.003 o
143.218 98.146 o
152.179 98.1041 o
155.936 95.3984 o
195.109 99.7467 o
213.323 104.866 o
225.321 113.763 o
234.282 119.368 o
238.038 120.757 o
277.211 147.416 o
295.426 163.076 o
307.423 168.675 o
316.384 184.941 o
320.141 182.881 o
359.314 203.364 o
377.529 202.594 o
389.526 207.649 o
398.487 204.252 o
73.8327 169.59 o
113.006 123.436 o
131.22 104.455 o
143.218 99.2771 o
152.179 99.8368 o
155.936 96.7606 o
195.109 100.613 o
213.323 108.255 o
225.321 121.388 o
234.282 121.782 o
238.038 125.769 o
277.211 153.254 o
295.426 169.621 o
307.423 176.645 o
316.384 193.788 o
320.141 189.788 o
359.314 209.188 o
377.529 208.992 o
389.526 209.884 o
398.487 209.397 o
grestore
1.500 setlinewidth
2 setlinecap
1.000 0.498 0.055 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 148.308582 m
113.005738 107.213109 l
131.220151 98.413915 l
143.217678 95.361485 l
152.178749 90.454198 l
155.935569 92.983873 l
195.108579 99.80781 l
213.322992 104.841888 l
225.320519 116.605825 l
234.28159 125.08099 l
238.03841 125.353558 l
277.211421 163.826337 l
295.425833 187.48756 l
307.42336 184.428284 l
316.384431 195.472773 l
320.141251 203.879387 l
359.314262 212.865617 l
377.528675 211.170657 l
389.526202 211.744902 l
398.487273 210.74406 l
73.832727 148.236234 m
113.005738 106.778019 l
131.220151 99.46176 l
143.217678 93.696061 l
152.178749 93.227367 l
155.935569 95.715271 l
195.108579 101.118206 l
213.322992 108.464154 l
225.320519 121.620171 l
234.28159 130.431241 l
238.03841 133.326939 l
277.211421 169.314423 l
295.425833 191.558691 l
307.42336 188.44809 l
316.384431 200.743575 l
320.141251 207.079349 l
359.314262 216.341358 l
377.528675 215.930213 l
389.526202 213.832632 l
398.487273 215.596563 l
stroke
grestore
1.000 setlinewidth
@@ -2585,52 +2585,52 @@ grestore
stroke
grestore
} bind def
73.8327 148.309 o
113.006 107.213 o
131.22 98.4139 o
143.218 95.3615 o
152.179 90.4542 o
155.936 92.9839 o
195.109 99.8078 o
213.323 104.842 o
225.321 116.606 o
234.282 125.081 o
238.038 125.354 o
277.211 163.826 o
295.426 187.488 o
307.423 184.428 o
316.384 195.473 o
320.141 203.879 o
359.314 212.866 o
377.529 211.171 o
389.526 211.745 o
398.487 210.744 o
73.8327 148.236 o
113.006 106.778 o
131.22 99.4618 o
143.218 93.6961 o
152.179 93.2274 o
155.936 95.7153 o
195.109 101.118 o
213.323 108.464 o
225.321 121.62 o
234.282 130.431 o
238.038 133.327 o
277.211 169.314 o
295.426 191.559 o
307.423 188.448 o
316.384 200.744 o
320.141 207.079 o
359.314 216.341 o
377.529 215.93 o
389.526 213.833 o
398.487 215.597 o
grestore
1.500 setlinewidth
2 setlinecap
0.173 0.627 0.173 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 106.77603 m
113.005738 73.179272 l
131.220151 63.172477 l
143.217678 64.663227 l
152.178749 56.05176 l
155.935569 56.996565 l
195.108579 57.061317 l
213.322992 50.196558 l
225.320519 50.112 l
234.28159 58.330236 l
238.03841 56.912508 l
277.211421 62.609528 l
295.425833 71.646717 l
307.42336 67.57055 l
316.384431 67.704391 l
320.141251 71.583409 l
359.314262 67.573447 l
377.528675 67.470544 l
389.526202 74.586859 l
398.487273 66.48293 l
73.832727 113.449657 m
113.005738 81.980367 l
131.220151 70.799096 l
143.217678 68.392339 l
152.178749 57.441842 l
155.935569 63.09751 l
195.108579 65.253427 l
213.322992 50.112 l
225.320519 56.673881 l
234.28159 63.388718 l
238.03841 56.806176 l
277.211421 74.758175 l
295.425833 78.377098 l
307.42336 75.109168 l
316.384431 71.195533 l
320.141251 80.647093 l
359.314262 69.957636 l
377.528675 73.568058 l
389.526202 81.386656 l
398.487273 73.818733 l
stroke
grestore
1.000 setlinewidth
@@ -2662,52 +2662,52 @@ grestore
stroke
grestore
} bind def
73.8327 106.776 o
113.006 73.1793 o
131.22 63.1725 o
143.218 64.6632 o
152.179 56.0518 o
155.936 56.9966 o
195.109 57.0613 o
213.323 50.1966 o
225.321 50.112 o
234.282 58.3302 o
238.038 56.9125 o
277.211 62.6095 o
295.426 71.6467 o
307.423 67.5706 o
316.384 67.7044 o
320.141 71.5834 o
359.314 67.5734 o
377.529 67.4705 o
389.526 74.5869 o
398.487 66.4829 o
73.8327 113.45 o
113.006 81.9804 o
131.22 70.7991 o
143.218 68.3923 o
152.179 57.4418 o
155.936 63.0975 o
195.109 65.2534 o
213.323 50.112 o
225.321 56.6739 o
234.282 63.3887 o
238.038 56.8062 o
277.211 74.7582 o
295.426 78.3771 o
307.423 75.1092 o
316.384 71.1955 o
320.141 80.6471 o
359.314 69.9576 o
377.529 73.5681 o
389.526 81.3867 o
398.487 73.8187 o
grestore
1.500 setlinewidth
2 setlinecap
0.839 0.153 0.157 setrgbcolor
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 167.458424 m
113.005738 154.318417 l
131.220151 158.1008 l
143.217678 159.727428 l
152.178749 162.420393 l
155.935569 162.897943 l
195.108579 185.983748 l
213.322992 207.734278 l
225.320519 220.501011 l
234.28159 230.405168 l
238.03841 231.973539 l
277.211421 269.837576 l
295.425833 286.074644 l
307.42336 285.657455 l
316.384431 286.419282 l
320.141251 285.94954 l
359.314262 286.336506 l
377.528675 285.864311 l
389.526202 289.839375 l
398.487273 285.673077 l
73.832727 165.729679 m
113.005738 156.312479 l
131.220151 160.539154 l
143.217678 162.622595 l
152.178749 166.681342 l
155.935569 167.355942 l
195.108579 190.741488 l
213.322992 212.282302 l
225.320519 223.906345 l
234.28159 233.425947 l
238.03841 236.44771 l
277.211421 273.739121 l
295.425833 288.661757 l
307.42336 289.078041 l
316.384431 288.650436 l
320.141251 289.003294 l
359.314262 288.444221 l
377.528675 289.055438 l
389.526202 289.27038 l
398.487273 288.947835 l
stroke
grestore
1.000 setlinewidth
@@ -2739,26 +2739,26 @@ grestore
stroke
grestore
} bind def
73.8327 167.458 o
113.006 154.318 o
131.22 158.101 o
143.218 159.727 o
152.179 162.42 o
155.936 162.898 o
195.109 185.984 o
213.323 207.734 o
225.321 220.501 o
234.282 230.405 o
238.038 231.974 o
277.211 269.838 o
295.426 286.075 o
307.423 285.657 o
316.384 286.419 o
320.141 285.95 o
359.314 286.337 o
377.529 285.864 o
389.526 289.839 o
398.487 285.673 o
73.8327 165.73 o
113.006 156.312 o
131.22 160.539 o
143.218 162.623 o
152.179 166.681 o
155.936 167.356 o
195.109 190.741 o
213.323 212.282 o
225.321 223.906 o
234.282 233.426 o
238.038 236.448 o
277.211 273.739 o
295.426 288.662 o
307.423 289.078 o
316.384 288.65 o
320.141 289.003 o
359.314 288.444 o
377.529 289.055 o
389.526 289.27 o
398.487 288.948 o
grestore
1.500 setlinewidth
2 setlinecap
@@ -2766,25 +2766,25 @@ grestore
gsave
357.1 266.1 57.6 38.02 clipbox
73.832727 292.032 m
113.005738 242.503483 l
131.220151 219.64527 l
143.217678 205.351078 l
152.178749 196.592585 l
155.935569 190.212201 l
195.108579 148.870731 l
213.322992 139.934366 l
225.320519 126.234824 l
234.28159 112.854723 l
238.03841 115.486157 l
277.211421 106.900302 l
295.425833 110.04379 l
307.42336 108.490408 l
316.384431 117.316588 l
320.141251 118.904073 l
359.314262 129.945948 l
377.528675 131.583958 l
389.526202 130.776699 l
398.487273 132.926616 l
113.005738 246.115358 l
131.220151 223.876492 l
143.217678 210.85098 l
152.178749 199.737656 l
155.935569 196.661209 l
195.108579 154.603021 l
213.322992 141.415158 l
225.320519 131.878837 l
234.28159 128.01902 l
238.03841 123.199237 l
277.211421 113.653516 l
295.425833 111.747544 l
307.42336 121.220339 l
316.384431 121.795009 l
320.141251 128.006337 l
359.314262 140.017575 l
377.528675 139.598301 l
389.526202 140.32928 l
398.487273 140.884048 l
stroke
grestore
1.000 setlinewidth
@@ -2817,25 +2817,25 @@ stroke
grestore
} bind def
73.8327 292.032 o
113.006 242.503 o
131.22 219.645 o
143.218 205.351 o
152.179 196.593 o
155.936 190.212 o
195.109 148.871 o
213.323 139.934 o
225.321 126.235 o
234.282 112.855 o
238.038 115.486 o
277.211 106.9 o
295.426 110.044 o
307.423 108.49 o
316.384 117.317 o
320.141 118.904 o
359.314 129.946 o
377.529 131.584 o
389.526 130.777 o
398.487 132.927 o
113.006 246.115 o
131.22 223.876 o
143.218 210.851 o
152.179 199.738 o
155.936 196.661 o
195.109 154.603 o
213.323 141.415 o
225.321 131.879 o
234.282 128.019 o
238.038 123.199 o
277.211 113.654 o
295.426 111.748 o
307.423 121.22 o
316.384 121.795 o
320.141 128.006 o
359.314 140.018 o
377.529 139.598 o
389.526 140.329 o
398.487 140.884 o
grestore
0.800 setlinewidth
0 setlinejoin


+ 0
- 2100
notebooks/test_parallel/myria/structuralspkernel5.eps
File diff suppressed because it is too large
View File


+ 0
- 1
pygraph/kernels/.##untildPathKernel.py# View File

@@ -1 +0,0 @@
ljia@ljia-Precision-7520.5692:1516782025

+ 72
- 48
pygraph/kernels/commonWalkKernel.py View File

@@ -8,12 +8,8 @@

import sys
import time
from tqdm import tqdm
from collections import Counter
from itertools import combinations_with_replacement
from functools import partial
from multiprocessing import Pool
#import traceback

import networkx as nx
import numpy as np
@@ -21,6 +17,7 @@ import numpy as np
sys.path.insert(0, "../")
from pygraph.utils.utils import direct_product
from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm


def commonwalkkernel(*args,
@@ -67,7 +64,16 @@ def commonwalkkernel(*args,
compute_method = compute_method.lower()
# arrange all graphs in a list
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
# remove graphs with only 1 node, as they do not have adjacency matrices
len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 1]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
if len(Gn) != len_gn:
print('\n %d graphs are removed as they have only 1 node.\n' %
(len_gn - len(Gn)))
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
@@ -82,50 +88,66 @@ def commonwalkkernel(*args,
Gn = [G.to_directed() for G in Gn]

start_time = time.time()
Kmatrix = np.zeros((len(Gn), len(Gn)))

# ---- use pool.imap_unordered to parallel and track progress. ----
pool = Pool(n_jobs)
itr = zip(combinations_with_replacement(Gn, 2),
combinations_with_replacement(range(0, len(Gn)), 2))
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
if len_itr < 1000 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 1000

def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
# direct product graph method - exponential
if compute_method == 'exp':
do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
# direct product graph method - geometric
elif compute_method == 'geo':
do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)

for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels',
file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()
do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)
# pool = Pool(n_jobs)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 1000 * n_jobs:
# chunksize = int(len_itr / n_jobs) + 1
# else:
# chunksize = 1000
#
# # direct product graph method - exponential
# if compute_method == 'exp':
# do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
# # direct product graph method - geometric
# elif compute_method == 'geo':
# do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)
#
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, chunksize),
# desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()


# # ---- direct running, normally use single CPU core. ----
# # direct product graph method - exponential
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# if compute_method == 'exp':
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# i, j, Kmatrix[i][j] = _commonwalkkernel_exp(Gn, node_label,
# edge_label, weight, gs)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
# edge_label, weight)
# Kmatrix[j][i] = Kmatrix[i][j]
#
# # direct product graph method - geometric
# elif compute_method == 'geo':
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# i, j, Kmatrix[i][j] = _commonwalkkernel_geo(Gn, node_label,
# edge_label, weight, gs)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
# edge_label, weight)
# Kmatrix[j][i] = Kmatrix[i][j]
#


# # search all paths use brute force.
# elif compute_method == 'brute':
# n = int(n)
@@ -149,7 +171,7 @@ def commonwalkkernel(*args,
"\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time
return Kmatrix, run_time, idx


def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):
@@ -177,6 +199,9 @@ def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):

# get tensor product / direct product
gp = direct_product(g1, g2, node_label, edge_label)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
A = nx.adjacency_matrix(gp).todense()
# print(A)

@@ -217,12 +242,10 @@ def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):
return exp_D.sum()


def wrapper_cw_exp(node_label, edge_label, beta, itr_item):
g1 = itr_item[0][0]
g2 = itr_item[0][1]
i = itr_item[1][0]
j = itr_item[1][1]
return i, j, _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta)
def wrapper_cw_exp(node_label, edge_label, beta, itr):
i = itr[0]
j = itr[1]
return i, j, _commonwalkkernel_exp(G_gn[i], G_gn[j], node_label, edge_label, beta)


def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma):
@@ -249,20 +272,21 @@ def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma):
"""
# get tensor product / direct product
gp = direct_product(g1, g2, node_label, edge_label)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
A = nx.adjacency_matrix(gp).todense()
mat = np.identity(len(A)) - gamma * A
try:
return mat.I.sum()
except np.linalg.LinAlgError:
return np.nan
# try:
return mat.I.sum()
# except np.linalg.LinAlgError:
# return np.nan
def wrapper_cw_geo(node_label, edge_label, gama, itr_item):
g1 = itr_item[0][0]
g2 = itr_item[0][1]
i = itr_item[1][0]
j = itr_item[1][1]
return i, j, _commonwalkkernel_geo(g1, g2, node_label, edge_label, gama)
def wrapper_cw_geo(node_label, edge_label, gama, itr):
i = itr[0]
j = itr[1]
return i, j, _commonwalkkernel_geo(G_gn[i], G_gn[j], node_label, edge_label, gama)


def _commonwalkkernel_brute(walks1,


+ 140
- 82
pygraph/kernels/marginalizedKernel.py View File

@@ -12,12 +12,11 @@

import sys
import time
from itertools import combinations_with_replacement
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm
tqdm.monitor_interval = 0
import traceback
#import traceback

import networkx as nx
import numpy as np
@@ -25,6 +24,7 @@ import numpy as np
from pygraph.utils.kernels import deltakernel
from pygraph.utils.utils import untotterTransformation
from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm
sys.path.insert(0, "../")


@@ -64,6 +64,7 @@ def marginalizedkernel(*args,
# pre-process
n_iteration = int(n_iteration)
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
@@ -76,16 +77,15 @@ def marginalizedkernel(*args,
nx.set_edge_attributes(G, '0', 'bond_type')

start_time = time.time()

if remove_totters:
# ---- use pool.imap_unordered to parallel and track progress. ----
pool = Pool(n_jobs)
untotter_partial = partial(wrap_untotter, Gn, node_label, edge_label)
if len(Gn) < 1000 * n_jobs:
untotter_partial = partial(wrapper_untotter, Gn, node_label, edge_label)
if len(Gn) < 100 * n_jobs:
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 1000
chunksize = 100
for i, g in tqdm(
pool.imap_unordered(
untotter_partial, range(0, len(Gn)), chunksize),
@@ -104,23 +104,13 @@ def marginalizedkernel(*args,
Kmatrix = np.zeros((len(Gn), len(Gn)))

# ---- use pool.imap_unordered to parallel and track progress. ----
pool = Pool(n_jobs)
do_partial = partial(_marginalizedkernel_do, Gn, node_label, edge_label,
p_quit, n_iteration)
itr = combinations_with_replacement(range(0, len(Gn)), 2)
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
if len_itr < 1000 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 1000
for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels',
file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_marg_do, node_label, edge_label,
p_quit, n_iteration)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)


# # ---- direct running, normally use single CPU core. ----
@@ -130,6 +120,7 @@ def marginalizedkernel(*args,
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# print(i, j)
# Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label,
# edge_label, p_quit, n_iteration)
# Kmatrix[j][i] = Kmatrix[i][j]
@@ -143,7 +134,7 @@ def marginalizedkernel(*args,
return Kmatrix, run_time


def _marginalizedkernel_do(Gn, node_label, edge_label, p_quit, n_iteration, ij):
def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
"""Calculate marginalized graph kernel between 2 graphs.

Parameters
@@ -164,69 +155,136 @@ def _marginalizedkernel_do(Gn, node_label, edge_label, p_quit, n_iteration, ij):
kernel : float
Marginalized Kernel between 2 graphs.
"""
try:
# init parameters
iglobal = ij[0]
jglobal = ij[1]
g1 = Gn[iglobal]
g2 = Gn[jglobal]
kernel = 0
num_nodes_G1 = nx.number_of_nodes(g1)
num_nodes_G2 = nx.number_of_nodes(g2)
# the initial probability distribution in the random walks generating step
# (uniform distribution over |G|)
p_init_G1 = 1 / num_nodes_G1
p_init_G2 = 1 / num_nodes_G2
q = p_quit * p_quit
r1 = q
# initial R_inf
# matrix to save all the R_inf for all pairs of nodes
R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
# init parameters
kernel = 0
num_nodes_G1 = nx.number_of_nodes(g1)
num_nodes_G2 = nx.number_of_nodes(g2)
# the initial probability distribution in the random walks generating step
# (uniform distribution over |G|)
p_init_G1 = 1 / num_nodes_G1
p_init_G2 = 1 / num_nodes_G2

q = p_quit * p_quit
r1 = q

# # initial R_inf
# # matrix to save all the R_inf for all pairs of nodes
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
#
# # calculate R_inf with a simple interative method
# for i in range(1, n_iteration):
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
# R_inf_new.fill(r1)
#
# # calculate R_inf for each pair of nodes
# for node1 in g1.nodes(data=True):
# neighbor_n1 = g1[node1[0]]
# # the transition probability distribution in the random walks
# # generating step (uniform distribution over the vertices adjacent
# # to the current vertex)
# if len(neighbor_n1) > 0:
# p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
# for node2 in g2.nodes(data=True):
# neighbor_n2 = g2[node2[0]]
# if len(neighbor_n2) > 0:
# p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
#
# for neighbor1 in neighbor_n1:
# for neighbor2 in neighbor_n2:
# t = p_trans_n1 * p_trans_n2 * \
# deltakernel(g1.node[neighbor1][node_label],
# g2.node[neighbor2][node_label]) * \
# deltakernel(
# neighbor_n1[neighbor1][edge_label],
# neighbor_n2[neighbor2][edge_label])
#
# R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
# neighbor2] # ref [1] equation (8)
# R_inf[:] = R_inf_new
#
# # add elements of R_inf up and calculate kernel
# for node1 in g1.nodes(data=True):
# for node2 in g2.nodes(data=True):
# s = p_init_G1 * p_init_G2 * deltakernel(
# node1[1][node_label], node2[1][node_label])
# kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
# calculate R_inf with a simple interative method
for i in range(1, n_iteration):
R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
R_inf_new.fill(r1)
# calculate R_inf for each pair of nodes
for node1 in g1.nodes(data=True):
neighbor_n1 = g1[node1[0]]
# the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent
# to the current vertex)
R_inf = {} # dict to save all the R_inf for all pairs of nodes
# initial R_inf, the 1st iteration.
for node1 in g1.nodes(data=True):
for node2 in g2.nodes(data=True):
# R_inf[(node1[0], node2[0])] = r1
if len(g1[node1[0]]) > 0:
if len(g2[node2[0]]) > 0:
R_inf[(node1[0], node2[0])] = r1
else:
R_inf[(node1[0], node2[0])] = p_quit
else:
if len(g2[node2[0]]) > 0:
R_inf[(node1[0], node2[0])] = p_quit
else:
R_inf[(node1[0], node2[0])] = 1
# compute all transition probability first.
t_dict = {}
if n_iteration > 1:
for node1 in g1.nodes(data=True):
neighbor_n1 = g1[node1[0]]
# the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent
# to the current vertex)
if len(neighbor_n1) > 0:
p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
for node2 in g2.nodes(data=True):
neighbor_n2 = g2[node2[0]]
p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t = p_trans_n1 * p_trans_n2 * \
deltakernel(g1.node[neighbor1][node_label],
g2.node[neighbor2][node_label]) * \
deltakernel(
neighbor_n1[neighbor1][edge_label],
neighbor_n2[neighbor2][edge_label])
R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
neighbor2] # ref [1] equation (8)
R_inf[:] = R_inf_new
# add elements of R_inf up and calculate kernel
if len(neighbor_n2) > 0:
p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t_dict[(node1[0], node2[0], neighbor1, neighbor2)] = \
p_trans_n1 * p_trans_n2 * \
deltakernel(g1.node[neighbor1][node_label],
g2.node[neighbor2][node_label]) * \
deltakernel(
neighbor_n1[neighbor1][edge_label],
neighbor_n2[neighbor2][edge_label])

# calculate R_inf with a simple interative method
for i in range(2, n_iteration + 1):
R_inf_old = R_inf.copy()

# calculate R_inf for each pair of nodes
for node1 in g1.nodes(data=True):
for node2 in g2.nodes(data=True):
s = p_init_G1 * p_init_G2 * deltakernel(
node1[1][node_label], node2[1][node_label])
kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
return iglobal, jglobal, kernel
except Exception as e:
traceback.print_exc()
print('')
raise e
neighbor_n1 = g1[node1[0]]
# the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent
# to the current vertex)
if len(neighbor_n1) > 0:
for node2 in g2.nodes(data=True):
neighbor_n2 = g2[node2[0]]
if len(neighbor_n2) > 0:
R_inf[(node1[0], node2[0])] = r1
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
R_inf[(node1[0], node2[0])] += \
(t_dict[(node1[0], node2[0], neighbor1, neighbor2)] * \
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8)

# add elements of R_inf up and calculate kernel
for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(
g1.nodes[n1][node_label], g2.nodes[n2][node_label])
kernel += s * value # ref [1] equation (6)

return kernel
def wrapper_marg_do(node_label, edge_label, p_quit, n_iteration, itr):
i= itr[0]
j = itr[1]
return i, j, _marginalizedkernel_do(G_gn[i], G_gn[j], node_label, edge_label, p_quit, n_iteration)

def wrap_untotter(Gn, node_label, edge_label, i):
return i, untotterTransformation(Gn[i], node_label, edge_label)
def wrapper_untotter(Gn, node_label, edge_label, i):
return i, untotterTransformation(Gn[i], node_label, edge_label)

+ 641
- 126
pygraph/kernels/randomWalkKernel.py View File

@@ -4,27 +4,35 @@
"""

import sys
import pathlib
sys.path.insert(0, "../")
import time
from functools import partial
from tqdm import tqdm
# from collections import Counter

import networkx as nx
import numpy as np
from scipy.sparse import identity, kron
from scipy.sparse.linalg import cg
from scipy.optimize import fixed_point

from pygraph.utils.graphdataset import get_dataset_attributes

from pygraph.utils.parallel import parallel_gm

def randomwalkkernel(*args,
# params for all method.
compute_method=None,
weight=1,
p=None,
q=None,
edge_weight=None,
# params for conjugate and fp method.
node_kernels=None,
edge_kernels=None,
node_label='atom',
edge_label='bond_type',
edge_weight=None,
h=10,
p=None,
q=None,
weight=None,
compute_method=''):
# params for spectral method.
sub_kernel=None,
n_jobs=None):
"""Calculate random walk graph kernels.
Parameters
----------
@@ -48,7 +56,6 @@ def randomwalkkernel(*args,
Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
"""
compute_method = compute_method.lower()
h = int(h)
Gn = args[0] if len(args) == 1 else [args[0], args[1]]

eweight = None
@@ -71,91 +78,68 @@ def randomwalkkernel(*args,

ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'],
node_label=node_label,
edge_label=edge_label)
if not ds_attrs['node_labeled']:
for G in Gn:
nx.set_node_attributes(G, '0', 'atom')
if not ds_attrs['edge_labeled']:
for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type')
# remove graphs with no edges, as no walk can be found in their structures,
# so the weight matrix between such a graph and itself might be zero.
len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
if len(Gn) != len_gn:
print('\n %d graphs are removed as they don\'t contain edges.\n' %
(len_gn - len(Gn)))

start_time = time.time()

# # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
# all_walks = [
# find_all_walks_until_length(
# Gn[i],
# n,
# node_label=node_label,
# edge_label=edge_label,
# labeled=labeled) for i in range(0, len(Gn))
# ]
# # get vertex and edge concatenated labels for each graph
# label_list, d = getLabels(Gn, node_label, edge_label, ds_attrs['is_directed'])
# gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed'])

if compute_method == 'sylvester':
import warnings
warnings.warn(
'The Sylvester equation (rather than generalized Sylvester equation) is used; edge label number has to smaller than 3.'
)
Kmatrix = _randomwalkkernel_sylvester(Gn, weight, p, q, node_label,
edge_label, eweight)
warnings.warn('All labels are ignored.')
Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs)

elif compute_method == 'conjugate':
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_conjugate(
Gn[i], Gn[j], node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs,
node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs)
elif compute_method == 'fp':
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_fp(Gn[i], Gn[j], node_label,
edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels,
edge_kernels, node_label, edge_label,
eweight, n_jobs)

elif compute_method == 'spectral':
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_spectral(
Gn[i], Gn[j], node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs)

elif compute_method == 'kron':
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j],
node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
else:
raise Exception(
'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
)

# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# Kmatrix[i][j] = _randomwalkkernel_do(
# all_walks[i],
# all_walks[j],
# node_label=node_label,
# edge_label=edge_label,
# labeled=labeled)
# Kmatrix[j][i] = Kmatrix[i][j]

run_time = time.time() - start_time
print(
"\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time
return Kmatrix, run_time, idx


def _randomwalkkernel_sylvester(Gn, lmda, p, q, node_label, edge_label,
eweight):
###############################################################################
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method.

Parameters
@@ -172,51 +156,77 @@ def _randomwalkkernel_sylvester(Gn, lmda, p, q, node_label, edge_label,
kernel : float
Kernel between 2 graphs.
"""
from control import dlyap
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
# don't normalize adjacency matrices if q is a uniform vector.
A_list = [
nx.adjacency_matrix(G, eweight).todense() for G in tqdm(
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list accually contains the transposes of the adjacency matrices.
A_wave_list = [
nx.adjacency_matrix(G, eweight).todense().transpose() for G in tqdm(
Gn, desc='compute adjacency matrices', file=sys.stdout)
]
if p == None:
pbar = tqdm(
total=(1 + len(Gn)) * len(Gn) / 2,
desc='calculating kernels',
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
A = lmda * A_list[j]
Q = A_list[i]
# use uniform distribution if there is no prior knowledge.
nb_pd = len(A_list[i]) * len(A_list[j])
pd_uni = 1 / nb_pd
C = np.full((len(A_list[j]), len(A_list[i])), pd_uni)
try:
X = dlyap(A, Q, C)
X = np.reshape(X, (-1, 1), order='F')
# use uniform distribution if there is no prior knowledge.
q_direct = np.full((1, nb_pd), pd_uni)
Kmatrix[i][j] = np.dot(q_direct, X)
except TypeError:
# print('sth wrong.')
Kmatrix[i][j] = np.nan

Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
# A_list = []
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
# A_tilde = nx.adjacency_matrix(G, weight=None).todense()
# # normalized adjacency matrices
# # A_list.append(A_tilde / A_tilde.sum(axis=0))
# A_list.append(A_tilde)
# # normalized adjacency matrices
# A_wave_list = []
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose()
# norm = A_tilde.sum(axis=0)
# norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm)
if p == None: # p is uniform distribution as default.
def init_worker(Awl_toshare):
global G_Awl
G_Awl = Awl_toshare
do_partial = partial(wrapper_se_do, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(A_wave_list,), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# S = lmda * A_wave_list[j]
# T_t = A_wave_list[i]
# # use uniform distribution if there is no prior knowledge.
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
# p_times_uni = 1 / nb_pd
# M0 = np.full((len(A_wave_list[j]), len(A_wave_list[i])), p_times_uni)
# X = dlyap(S, T_t, M0)
# X = np.reshape(X, (-1, 1), order='F')
# # use uniform distribution if there is no prior knowledge.
# q_times = np.full((1, nb_pd), p_times_uni)
# Kmatrix[i][j] = np.dot(q_times, X)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)

return Kmatrix


def _randomwalkkernel_conjugate(G1, G2, node_label, edge_label):
def wrapper_se_do(lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _se_do(G_Awl[i], G_Awl[j], lmda)


def _se_do(A_wave1, A_wave2, lmda):
from control import dlyap
S = lmda * A_wave2
T_t = A_wave1
# use uniform distribution if there is no prior knowledge.
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni)
X = dlyap(S, T_t, M0)
X = np.reshape(X, (-1, 1), order='F')
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, X)


###############################################################################
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method.

Parameters
@@ -233,21 +243,105 @@ def _randomwalkkernel_conjugate(G1, G2, node_label, edge_label):
kernel : float
Kernel between 2 graphs.
"""

dpg = nx.tensor_product(G1, G2) # direct product graph
import matplotlib.pyplot as plt
nx.draw_networkx(G1)
plt.show()
nx.draw_networkx(G2)
plt.show()
nx.draw_networkx(dpg)
plt.show()
X = dlyap(A, Q, C)

return kernel
Kmatrix = np.zeros((len(Gn), len(Gn)))
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list accually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# def init_worker(Awl_toshare):
# global G_Awl
# G_Awl = Awl_toshare
# do_partial = partial(wrapper_cg_unlabled_do, lmda)
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
# glbv=(A_wave_list,), n_jobs=n_jobs)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in tqdm(
Gn, desc='reindex vertices', file=sys.stdout)]
if p == None and q == None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# result = _cg_labled_do(Gn[i], Gn[j], ds_attrs, node_kernels,
# node_label, edge_kernels, edge_label, lmda)
# Kmatrix[i][j] = result
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
return Kmatrix


def _randomwalkkernel_fp(G1, G2, node_label, edge_label):
def wrapper_cg_unlabled_do(lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _cg_unlabled_do(G_Awl[i], G_Awl[j], lmda)


def _cg_unlabled_do(A_wave1, A_wave2, lmda):
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
w_times = kron(A_wave1, A_wave2).todense()
A = identity(w_times.shape[0]) - w_times * lmda
b = np.full((nb_pd, 1), p_times_uni)
x, _ = cg(A, b)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, x)


def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, ompute kernels between all pairs of nodes, method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge.
p_times_uni = 1 / w_dim
A = identity(w_times.shape[0]) - w_times * lmda
b = np.full((w_dim, 1), p_times_uni)
x, _ = cg(A, b)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)


###############################################################################
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.

Parameters
@@ -264,15 +358,96 @@ def _randomwalkkernel_fp(G1, G2, node_label, edge_label):
kernel : float
Kernel between 2 graphs.
"""

dpg = nx.tensor_product(G1, G2) # direct product graph
X = dlyap(A, Q, C)

return kernel
Kmatrix = np.zeros((len(Gn), len(Gn)))
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list accually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# # use uniform distribution if there is no prior knowledge.
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
# p_times_uni = 1 / nb_pd
# w_times = kron(A_wave_list[i], A_wave_list[j]).todense()
# p_times = np.full((nb_pd, 1), p_times_uni)
# x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times))
# # use uniform distribution if there is no prior knowledge.
# q_times = np.full((1, nb_pd), p_times_uni)
# Kmatrix[i][j] = np.dot(q_times, x)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in tqdm(
Gn, desc='reindex vertices', file=sys.stdout)]
if p == None and q == None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)
return Kmatrix


def _randomwalkkernel_spectral(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using spectral decomposition method.
def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, ompute kernels between all pairs of nodes, method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge.
p_times_uni = 1 / w_dim
p_times = np.full((w_dim, 1), p_times_uni)
x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times),
xtol=1e-06, maxiter=1000)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)


def func_fp(x, p_times, lmda, w_times):
haha = w_times * x
haha = lmda * haha
haha = p_times + haha
return p_times + lmda * np.dot(w_times, x)


###############################################################################
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using
spectral decomposition method. Labels will be ignored.

Parameters
----------
@@ -288,13 +463,72 @@ def _randomwalkkernel_spectral(G1, G2, node_label, edge_label):
kernel : float
Kernel between 2 graphs.
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
# precompute the spectral decomposition of each graph.
P_list = []
D_list = []
for G in tqdm(Gn, desc='spectral decompose', file=sys.stdout):
# don't normalize adjacency matrices if q is a uniform vector. Note
# A accually is the transpose of the adjacency matrix.
A = nx.adjacency_matrix(G, eweight).todense().transpose()
ew, ev = np.linalg.eig(A)
D_list.append(ew)
P_list.append(ev)
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?

if p == None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn]
# q_T_list = [q.T for q in q_list]
def init_worker(q_T_toshare, P_toshare, D_toshare):
global G_q_T, G_P, G_D
G_q_T = q_T_toshare
G_P = P_toshare
G_D = D_toshare
do_partial = partial(wrapper_sd_do, weight, sub_kernel)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# result = _sd_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j],
# D_list[i], D_list[j], weight, sub_kernel)
# Kmatrix[i][j] = result
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
return Kmatrix


dpg = nx.tensor_product(G1, G2) # direct product graph
X = dlyap(A, Q, C)
def wrapper_sd_do(weight, sub_kernel, itr):
i = itr[0]
j = itr[1]
return i, j, _sd_do(G_q_T[i], G_q_T[j], G_P[i], G_P[j], G_D[i], G_D[j],
weight, sub_kernel)

return kernel

def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
# use uniform distribution if there is no prior knowledge.
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense()
# @todo: this is not be needed when p = q (kr = kl.T) for undirected graphs
# kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense()
if sub_kernel == 'exp':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(np.exp(weight * D_diag))
elif sub_kernel == 'geo':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(weight * D_diag)
kmiddle = np.identity(len(kmiddle)) - weight * kmiddle
kmiddle = np.linalg.inv(kmiddle)
return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0]


###############################################################################
def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.

@@ -312,8 +546,289 @@ def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
kernel : float
Kernel between 2 graphs.
"""
pass

dpg = nx.tensor_product(G1, G2) # direct product graph
X = dlyap(A, Q, C)

return kernel
###############################################################################
def getLabels(Gn, node_label, edge_label, directed):
"""Get symbolic labels of a graph dataset, where vertex labels are dealt
with by concatenating them to the edge labels of adjacent edges.
"""
label_list = []
label_set = set()
for g in Gn:
label_g = {}
for e in g.edges(data=True):
nl1 = g.node[e[0]][node_label]
nl2 = g.node[e[1]][node_label]
if not directed and nl1 > nl2:
nl1, nl2 = nl2, nl1
label = (nl1, e[2][edge_label], nl2)
label_g[(e[0], e[1])] = label
label_list.append(label_g)
label_set = set([l for lg in label_list for l in lg.values()])
return label_list, len(label_set)


def filterGramMatrix(gmt, label_dict, label, directed):
"""Compute (the transpose of) the Gram matrix filtered by a label.
"""
gmf = np.zeros(gmt.shape)
for (n1, n2), l in label_dict.items():
if l == label:
gmf[n2, n1] = gmt[n2, n1]
if not directed:
gmf[n1, n2] = gmt[n1, n2]
return gmf


def computeVK(g1, g2, ds_attrs, node_kernels, node_label):
'''Compute vertex kernels between vertices of two graphs.
'''
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
pass
return vk_dict


def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):
'''Compute weight matrix of the direct product graph.
'''
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
w_times = np.zeros((w_dim, w_dim))
if vk_dict: # node labeled
if ds_attrs['is_directed']:
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* vk_dict[(e1[1], e2[1])]
else: # undirected
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else: # node unlabeled
if ds_attrs['is_directed']:
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = 1
else: # undirected
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = 1
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
return w_times, w_dim

+ 842
- 0
pygraph/kernels/rwalk_sym.py View File

@@ -0,0 +1,842 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:53:57 2018

@author: ljia
@references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and
Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research,
11(Apr):1201–1242, 2010.
"""

import sys
sys.path.insert(0, "../")
import time
from functools import partial
from tqdm import tqdm

import networkx as nx
import numpy as np
from scipy.sparse import identity, kron
from scipy.sparse.linalg import cg
from scipy.optimize import fixed_point

from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm

def randomwalkkernel(*args,
# params for all method.
compute_method=None,
weight=1,
p=None,
q=None,
edge_weight=None,
# params for conjugate and fp method.
node_kernels=None,
edge_kernels=None,
node_label='atom',
edge_label='bond_type',
# params for spectral method.
sub_kernel=None,
n_jobs=None):
"""Calculate random walk graph kernels.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
h : integer
Longest length of walks.
method : string
Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
"""
compute_method = compute_method.lower()
Gn = args[0] if len(args) == 1 else [args[0], args[1]]

eweight = None
if edge_weight == None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, float) or isinstance(some_weight, int):
eweight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)

ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'],
node_label=node_label,
edge_label=edge_label)
ds_attrs['node_attr_dim'] = 0
ds_attrs['edge_attr_dim'] = 0
# remove graphs with no edges, as no walk can be found in their structures,
# so the weight matrix between such a graph and itself might be zero.
len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
if len(Gn) != len_gn:
print('\n %d graphs are removed as they don\'t contain edges.\n' %
(len_gn - len(Gn)))

start_time = time.time()
# # get vertex and edge concatenated labels for each graph
# label_list, d = getLabels(Gn, node_label, edge_label, ds_attrs['is_directed'])
# gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed'])

if compute_method == 'sylvester':
import warnings
warnings.warn('All labels are ignored.')
Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs)

elif compute_method == 'conjugate':
Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs,
node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs)
elif compute_method == 'fp':
Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels,
edge_kernels, node_label, edge_label,
eweight, n_jobs)

elif compute_method == 'spectral':
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs)

elif compute_method == 'kron':
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j],
node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
else:
raise Exception(
'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
)

run_time = time.time() - start_time
print(
"\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time, idx


###############################################################################
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list accually contains the transposes of the adjacency matrices.
A_wave_list = [
nx.adjacency_matrix(G, eweight).todense().transpose() for G in tqdm(
Gn, desc='compute adjacency matrices', file=sys.stdout)
]
# # normalized adjacency matrices
# A_wave_list = []
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose()
# norm = A_tilde.sum(axis=0)
# norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm)
if p == None: # p is uniform distribution as default.
def init_worker(Awl_toshare):
global G_Awl
G_Awl = Awl_toshare
do_partial = partial(wrapper_se_do, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(A_wave_list,), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# S = lmda * A_wave_list[j]
# T_t = A_wave_list[i]
# # use uniform distribution if there is no prior knowledge.
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
# p_times_uni = 1 / nb_pd
# M0 = np.full((len(A_wave_list[j]), len(A_wave_list[i])), p_times_uni)
# X = dlyap(S, T_t, M0)
# X = np.reshape(X, (-1, 1), order='F')
# # use uniform distribution if there is no prior knowledge.
# q_times = np.full((1, nb_pd), p_times_uni)
# Kmatrix[i][j] = np.dot(q_times, X)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)

return Kmatrix


def wrapper_se_do(lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _se_do(G_Awl[i], G_Awl[j], lmda)


def _se_do(A_wave1, A_wave2, lmda):
from control import dlyap
S = lmda * A_wave2
T_t = A_wave1
# use uniform distribution if there is no prior knowledge.
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni)
X = dlyap(S, T_t, M0)
X = np.reshape(X, (-1, 1), order='F')
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, X)


###############################################################################
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list accually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# def init_worker(Awl_toshare):
# global G_Awl
# G_Awl = Awl_toshare
# do_partial = partial(wrapper_cg_unlabled_do, lmda)
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
# glbv=(A_wave_list,), n_jobs=n_jobs)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in tqdm(
Gn, desc='reindex vertices', file=sys.stdout)]
if p == None and q == None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# result = _cg_labled_do(Gn[i], Gn[j], ds_attrs, node_kernels,
# node_label, edge_kernels, edge_label, lmda)
# Kmatrix[i][j] = result
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
return Kmatrix


def wrapper_cg_unlabled_do(lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _cg_unlabled_do(G_Awl[i], G_Awl[j], lmda)


def _cg_unlabled_do(A_wave1, A_wave2, lmda):
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
w_times = kron(A_wave1, A_wave2).todense()
A = identity(w_times.shape[0]) - w_times * lmda
b = np.full((nb_pd, 1), p_times_uni)
x, _ = cg(A, b)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, x)


def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, ompute kernels between all pairs of nodes, method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge.
p_times_uni = 1 / w_dim
A = identity(w_times.shape[0]) - w_times * lmda
b = np.full((w_dim, 1), p_times_uni)
x, _ = cg(A, b)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)


###############################################################################
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""

Kmatrix = np.zeros((len(Gn), len(Gn)))
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list accually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# # use uniform distribution if there is no prior knowledge.
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
# p_times_uni = 1 / nb_pd
# w_times = kron(A_wave_list[i], A_wave_list[j]).todense()
# p_times = np.full((nb_pd, 1), p_times_uni)
# x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times))
# # use uniform distribution if there is no prior knowledge.
# q_times = np.full((1, nb_pd), p_times_uni)
# Kmatrix[i][j] = np.dot(q_times, x)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in tqdm(
Gn, desc='reindex vertices', file=sys.stdout)]
if p == None and q == None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)
return Kmatrix


def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, ompute kernels between all pairs of nodes, method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge.
p_times_uni = 1 / w_dim
p_times = np.full((w_dim, 1), p_times_uni)
x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times),
xtol=1e-06, maxiter=1000)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)


def func_fp(x, p_times, lmda, w_times):
haha = w_times * x
haha = lmda * haha
haha = p_times + haha
return p_times + lmda * np.dot(w_times, x)


###############################################################################
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using
spectral decomposition method. Labels will be ignored.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
# precompute the spectral decomposition of each graph.
P_list = []
D_list = []
for G in tqdm(Gn, desc='spectral decompose', file=sys.stdout):
# don't normalize adjacency matrices if q is a uniform vector. Note
# A accually is the transpose of the adjacency matrix.
A = nx.adjacency_matrix(G, eweight).todense().transpose()
ew, ev = np.linalg.eig(A)
D_list.append(ew)
P_list.append(ev)
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?

if p == None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn]
# q_T_list = [q.T for q in q_list]
def init_worker(q_T_toshare, P_toshare, D_toshare):
global G_q_T, G_P, G_D
G_q_T = q_T_toshare
G_P = P_toshare
G_D = D_toshare
do_partial = partial(wrapper_sd_do, weight, sub_kernel)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# result = _sd_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j],
# D_list[i], D_list[j], weight, sub_kernel)
# Kmatrix[i][j] = result
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
return Kmatrix


def wrapper_sd_do(weight, sub_kernel, itr):
i = itr[0]
j = itr[1]
return i, j, _sd_do(G_q_T[i], G_q_T[j], G_P[i], G_P[j], G_D[i], G_D[j],
weight, sub_kernel)


def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
# use uniform distribution if there is no prior knowledge.
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense()
# @todo: this is not be needed when p = q (kr = kl.T) for undirected graphs
# kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense()
if sub_kernel == 'exp':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(np.exp(weight * D_diag))
elif sub_kernel == 'geo':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(weight * D_diag)
kmiddle = np.identity(len(kmiddle)) - weight * kmiddle
kmiddle = np.linalg.inv(kmiddle)
return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0]


###############################################################################
def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
pass


###############################################################################
def getLabels(Gn, node_label, edge_label, directed):
"""Get symbolic labels of a graph dataset, where vertex labels are dealt
with by concatenating them to the edge labels of adjacent edges.
"""
label_list = []
label_set = set()
for g in Gn:
label_g = {}
for e in g.edges(data=True):
nl1 = g.node[e[0]][node_label]
nl2 = g.node[e[1]][node_label]
if not directed and nl1 > nl2:
nl1, nl2 = nl2, nl1
label = (nl1, e[2][edge_label], nl2)
label_g[(e[0], e[1])] = label
label_list.append(label_g)
label_set = set([l for lg in label_list for l in lg.values()])
return label_list, len(label_set)


def filterGramMatrix(gmt, label_dict, label, directed):
"""Compute (the transpose of) the Gram matrix filtered by a label.
"""
gmf = np.zeros(gmt.shape)
for (n1, n2), l in label_dict.items():
if l == label:
gmf[n2, n1] = gmt[n2, n1]
if not directed:
gmf[n1, n2] = gmt[n1, n2]
return gmf


def computeVK(g1, g2, ds_attrs, node_kernels, node_label):
'''Compute vertex kernels between vertices of two graphs.
'''
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
pass
return vk_dict


def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):
'''Compute weight matrix of the direct product graph.
'''
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
w_times = np.zeros((w_dim, w_dim))
if vk_dict: # node labeled
if ds_attrs['is_directed']:
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* vk_dict[(e1[1], e2[1])]
else: # undirected
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else: # node unlabeled
if ds_attrs['is_directed']:
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = 1
else: # undirected
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = 1
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
return w_times, w_dim

+ 27
- 32
pygraph/kernels/spKernel.py View File

@@ -6,7 +6,7 @@ Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.

import sys
import time
from itertools import combinations_with_replacement, product
from itertools import product
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm
@@ -16,6 +16,7 @@ import numpy as np

from pygraph.utils.utils import getSPGraph
from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm
sys.path.insert(0, "../")

def spkernel(*args,
@@ -90,14 +91,14 @@ def spkernel(*args,
# get shortest path graphs of Gn
getsp_partial = partial(wrapper_getSPGraph, weight)
itr = zip(Gn, range(0, len(Gn)))
if len(Gn) < 1000 * n_jobs:
if len(Gn) < 100 * n_jobs:
# # use default chunksize as pool.map when iterable is less than 100
# chunksize, extra = divmod(len(Gn), n_jobs * 4)
# if extra:
# chunksize += 1
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 1000
chunksize = 100
for i, g in tqdm(
pool.imap_unordered(getsp_partial, itr, chunksize),
desc='getting sp graphs', file=sys.stdout):
@@ -107,7 +108,7 @@ def spkernel(*args,
# # ---- direct running, normally use single CPU core. ----
# for i in tqdm(range(len(Gn)), desc='getting sp graphs', file=sys.stdout):
# i, Gn[i] = wrap_getSPGraph(Gn, weight, i)
# i, Gn[i] = wrapper_getSPGraph(weight, (Gn[i], i))

# # ---- use pool.map to parallel ----
# result_sp = pool.map(getsp_partial, range(0, len(Gn)))
@@ -142,23 +143,13 @@ def spkernel(*args,
Kmatrix = np.zeros((len(Gn), len(Gn)))

# ---- use pool.imap_unordered to parallel and track progress. ----
pool = Pool(n_jobs)
do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels)
itr = zip(combinations_with_replacement(Gn, 2),
combinations_with_replacement(range(0, len(Gn)), 2))
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
if len_itr < 1000 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 1000
for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels',
file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)


# # ---- use pool.map to parallel. ----
# # result_perf = pool.map(do_partial, itr)
@@ -186,9 +177,10 @@ def spkernel(*args,
# Kmatrix[i[1]][i[0]] = i[2]

# # ---- direct running, normally use single CPU core. ----
# from itertools import combinations_with_replacement
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# i, j, kernel = spkernel_do(Gn, ds_attrs, node_label, node_kernels, gs)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels)
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel

@@ -205,11 +197,11 @@ def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):
kernel = 0

# compute shortest path matrices first, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
@@ -218,7 +210,6 @@ def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):
# node symb labeled
else:
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
@@ -227,7 +218,6 @@ def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
@@ -292,12 +282,17 @@ def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):
return kernel


def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr_item):
g1 = itr_item[0][0]
g2 = itr_item[0][1]
i = itr_item[1][0]
j = itr_item[1][1]
return i, j, spkernel_do(g1, g2, ds_attrs, node_label, node_kernels)
def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):
i = itr[0]
j = itr[1]
return i, j, spkernel_do(G_gn[i], G_gn[j], ds_attrs, node_label, node_kernels)

#def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr_item):
# g1 = itr_item[0][0]
# g2 = itr_item[0][1]
# i = itr_item[1][0]
# j = itr_item[1][1]
# return i, j, spkernel_do(g1, g2, ds_attrs, node_label, node_kernels)


def wrapper_getSPGraph(weight, itr_item):


+ 200
- 0
pygraph/kernels/sp_sym.py View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 21 18:02:00 2018

@author: ljia
"""

import sys
import time
from itertools import product
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm

import networkx as nx
import numpy as np

from pygraph.utils.utils import getSPGraph
from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm
sys.path.insert(0, "../")

def spkernel(*args,
node_label='atom',
edge_weight=None,
node_kernels=None,
n_jobs=None):
"""Calculate shortest-path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_weight : string
Edge attribute name corresponding to the edge weight.
node_kernels: dict
A dictionary of kernel functions for nodes, including 3 items: 'symb'
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
for both labels. The first 2 functions take two node labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two nodes. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns an
number as the kernel value. Ignored when nodes are unlabeled.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the sp kernel between 2 praphs.
"""
# pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
weight = None
if edge_weight is None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, (float, int)):
weight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'is_directed'],
node_label=node_label)
ds_attrs['node_attr_dim'] = 0

# remove graphs with no edges, as no sp can be found in their structures,
# so the kernel between such a graph and itself will be zero.
len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
if len(Gn) != len_gn:
print('\n %d graphs are removed as they don\'t contain edges.\n' %
(len_gn - len(Gn)))

start_time = time.time()

pool = Pool(n_jobs)
# get shortest path graphs of Gn
getsp_partial = partial(wrapper_getSPGraph, weight)
itr = zip(Gn, range(0, len(Gn)))
if len(Gn) < 100 * n_jobs:
# # use default chunksize as pool.map when iterable is less than 100
# chunksize, extra = divmod(len(Gn), n_jobs * 4)
# if extra:
# chunksize += 1
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 100
for i, g in tqdm(
pool.imap_unordered(getsp_partial, itr, chunksize),
desc='getting sp graphs', file=sys.stdout):
Gn[i] = g
pool.close()
pool.join()

Kmatrix = np.zeros((len(Gn), len(Gn)))

# ---- use pool.imap_unordered to parallel and track progress. ----
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)

run_time = time.time() - start_time
print(
"\n --- shortest path kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time, idx


def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):
kernel = 0

# compute shortest path matrices first, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kernel += 1
return kernel

# compute graph kernels
if ds_attrs['is_directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kernel += kn1
else:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
# each edge walk is counted twice, starting from both its extreme nodes.
nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
e1[0], e2[1])], vk_dict[(e1[1],
e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kn2 = nk12 * nk21
kernel += kn1 + kn2

return kernel


def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):
i = itr[0]
j = itr[1]
return i, j, spkernel_do(G_gn[i], G_gn[j], ds_attrs, node_label, node_kernels)


def wrapper_getSPGraph(weight, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, getSPGraph(g, edge_weight=weight)

+ 464
- 0
pygraph/kernels/ssp_sym.py View File

@@ -0,0 +1,464 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:42:48 2018

@author: ljia
"""

import sys
import time
from itertools import combinations, product
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm

import networkx as nx
import numpy as np

from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm

sys.path.insert(0, "../")


def structuralspkernel(*args,
node_label='atom',
edge_weight=None,
edge_label='bond_type',
node_kernels=None,
edge_kernels=None,
n_jobs=None):
"""Calculate mean average structural shortest path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_weight : string
Edge attribute name corresponding to the edge weight.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
node_kernels: dict
A dictionary of kernel functions for nodes, including 3 items: 'symb'
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
for both labels. The first 2 functions take two node labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two nodes. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns a number
as the kernel value. Ignored when nodes are unlabeled.
edge_kernels: dict
A dictionary of kernel functions for edges, including 3 items: 'symb'
for symbolic edge labels, 'nsymb' for non-symbolic edge labels, 'mix'
for both labels. The first 2 functions take two edge labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two edges. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns a number
as the kernel value. Ignored when edges are unlabeled.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the mean average structural
shortest path kernel between 2 praphs.
"""
# pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
weight = None
if edge_weight is None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, (float, int)):
weight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'],
node_label=node_label, edge_label=edge_label)
ds_attrs['node_attr_dim'] = 0
ds_attrs['edge_attr_dim'] = 0

start_time = time.time()

# get shortest paths of each graph in Gn
splist = [None] * len(Gn)
pool = Pool(n_jobs)
# get shortest path graphs of Gn
getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed'])
itr = zip(Gn, range(0, len(Gn)))
if len(Gn) < 100 * n_jobs:
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 100
# chunksize = 300 # int(len(list(itr)) / n_jobs)
for i, sp in tqdm(
pool.imap_unordered(getsp_partial, itr, chunksize),
desc='getting shortest paths',
file=sys.stdout):
splist[i] = sp
# time.sleep(10)
pool.close()
pool.join()
# # get shortest paths of each graph in Gn
# splist = [[] for _ in range(len(Gn))]
# # get shortest path graphs of Gn
# getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed'])
# itr = zip(Gn, range(0, len(Gn)))
# if len(Gn) < 1000 * n_jobs:
# chunksize = int(len(Gn) / n_jobs) + 1
# else:
# chunksize = 1000
# # chunksize = 300 # int(len(list(itr)) / n_jobs)
# from contextlib import closing
# with closing(Pool(n_jobs)) as pool:
## for i, sp in tqdm(
# res = pool.imap_unordered(getsp_partial, itr, 10)
## desc='getting shortest paths',
## file=sys.stdout):
## splist[i] = sp
## time.sleep(10)
# pool.close()
# pool.join()
# ss = 0
# ss += sys.getsizeof(splist)
# for spss in splist:
# ss += sys.getsizeof(spss)
# for spp in spss:
# ss += sys.getsizeof(spp)
# time.sleep(20)
# # ---- direct running, normally use single CPU core. ----
# splist = []
# for g in tqdm(Gn, desc='getting sp graphs', file=sys.stdout):
# splist.append(get_shortest_paths(g, weight, ds_attrs['is_directed']))

# # ---- only for the Fast Computation of Shortest Path Kernel (FCSP)
# sp_ml = [0] * len(Gn) # shortest path matrices
# for i in result_sp:
# sp_ml[i[0]] = i[1]
# edge_x_g = [[] for i in range(len(sp_ml))]
# edge_y_g = [[] for i in range(len(sp_ml))]
# edge_w_g = [[] for i in range(len(sp_ml))]
# for idx, item in enumerate(sp_ml):
# for i1 in range(len(item)):
# for i2 in range(i1 + 1, len(item)):
# if item[i1, i2] != np.inf:
# edge_x_g[idx].append(i1)
# edge_y_g[idx].append(i2)
# edge_w_g[idx].append(item[i1, i2])
# print(len(edge_x_g[0]))
# print(len(edge_y_g[0]))
# print(len(edge_w_g[0]))

Kmatrix = np.zeros((len(Gn), len(Gn)))
# ---- use pool.imap_unordered to parallel and track progress. ----
def init_worker(spl_toshare, gs_toshare):
global G_spl, G_gs
G_spl = spl_toshare
G_gs = gs_toshare
do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
node_kernels, edge_kernels)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(splist, Gn), n_jobs=n_jobs)

# # ---- use pool.imap_unordered to parallel and track progress. ----
# pool = Pool(n_jobs)
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 1000 * n_jobs:
# chunksize = int(len_itr / n_jobs) + 1
# else:
# chunksize = 1000
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, chunksize),
# desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()
# # ---- use pool.map to parallel. ----
# pool = Pool(n_jobs)
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# for i, j, kernel in tqdm(
# pool.map(do_partial, itr), desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()

# # ---- use pool.imap_unordered to parallel and track progress. ----
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 1000 * n_jobs:
# chunksize = int(len_itr / n_jobs) + 1
# else:
# chunksize = 1000
# from contextlib import closing
# with closing(Pool(n_jobs)) as pool:
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, 1000),
# desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()


# # ---- direct running, normally use single CPU core. ----
# from itertools import combinations_with_replacement
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# kernel = structuralspkernel_do(Gn[i], Gn[j], splist[i], splist[j],
# ds_attrs, node_label, edge_label, node_kernels, edge_kernels)
## if(kernel > 1):
## print("error here ")
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel

run_time = time.time() - start_time
print(
"\n --- shortest path kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time


def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
node_kernels, edge_kernels):
kernel = 0

# First, compute shortest path matrices, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
pass

# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = kn(e1[2]['attributes'], e2[2]['attributes'])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge unlabeled
else:
pass

# compute graph kernels
if vk_dict:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])] * \
ek_dict[((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
kernel += 1
else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= ek_dict[((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kernel += 1

kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average

# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernel matrix
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])

# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# Kmatrix += kn1 + kn2
return kernel


def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels,
edge_kernels, itr):
i = itr[0]
j = itr[1]
return i, j, structuralspkernel_do(G_gs[i], G_gs[j], G_spl[i], G_spl[j],
ds_attrs, node_label, edge_label,
node_kernels, edge_kernels)


def get_shortest_paths(G, weight, directed):
"""Get all shortest paths of a graph.

Parameters
----------
G : NetworkX graphs
The graphs whose paths are calculated.
weight : string/None
edge attribute used as weight to calculate the shortest path.
directed: boolean
Whether graph is directed.

Return
------
sp : list of list
List of shortest paths of the graph, where each path is represented by a list of nodes.
"""
sp = []
for n1, n2 in combinations(G.nodes(), 2):
try:
spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight))
except nx.NetworkXNoPath: # nodes not connected
# sp.append([])
pass
else:
sp += spltemp
# each edge walk is counted twice, starting from both its extreme nodes.
if not directed:
sp += [sptemp[::-1] for sptemp in spltemp]
# add single nodes as length 0 paths.
sp += [[n] for n in G.nodes()]
return sp


def wrapper_getSP(weight, directed, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, get_shortest_paths(g, weight, directed)

+ 66
- 48
pygraph/kernels/structuralspKernel.py View File

@@ -10,7 +10,7 @@ Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).

import sys
import time
from itertools import combinations, combinations_with_replacement, product
from itertools import combinations, product
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm
@@ -19,6 +19,7 @@ import networkx as nx
import numpy as np

from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm

sys.path.insert(0, "../")

@@ -101,10 +102,10 @@ def structuralspkernel(*args,
# get shortest path graphs of Gn
getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed'])
itr = zip(Gn, range(0, len(Gn)))
if len(Gn) < 1000 * n_jobs:
if len(Gn) < 100 * n_jobs:
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 1000
chunksize = 100
# chunksize = 300 # int(len(list(itr)) / n_jobs)
for i, sp in tqdm(
pool.imap_unordered(getsp_partial, itr, chunksize),
@@ -171,27 +172,53 @@ def structuralspkernel(*args,
# print(len(edge_w_g[0]))

Kmatrix = np.zeros((len(Gn), len(Gn)))
# ---- use pool.imap_unordered to parallel and track progress. ----
pool = Pool(n_jobs)
def init_worker(spl_toshare, gs_toshare):
global G_spl, G_gs
G_spl = spl_toshare
G_gs = gs_toshare
do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
node_kernels, edge_kernels)
itr = zip(combinations_with_replacement(Gn, 2),
combinations_with_replacement(splist, 2),
combinations_with_replacement(range(0, len(Gn)), 2))
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
if len_itr < 1000 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 1000
for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels',
file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()
node_kernels, edge_kernels)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(splist, Gn), n_jobs=n_jobs)

# # ---- use pool.imap_unordered to parallel and track progress. ----
# pool = Pool(n_jobs)
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 1000 * n_jobs:
# chunksize = int(len_itr / n_jobs) + 1
# else:
# chunksize = 1000
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, chunksize),
# desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()
# # ---- use pool.map to parallel. ----
# pool = Pool(n_jobs)
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# for i, j, kernel in tqdm(
# pool.map(do_partial, itr), desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()

# # ---- use pool.imap_unordered to parallel and track progress. ----
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
@@ -217,14 +244,12 @@ def structuralspkernel(*args,


# # ---- direct running, normally use single CPU core. ----
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# i, j, kernel = wrapper_ssp_do(ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels, gs)
# if(kernel > 1):
# print("error here ")
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# kernel = structuralspkernel_do(Gn[i], Gn[j], splist[i], splist[j],
# ds_attrs, node_label, edge_label, node_kernels, edge_kernels)
## if(kernel > 1):
## print("error here ")
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel

@@ -242,11 +267,11 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
kernel = 0

# First, compute shortest path matrices, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
@@ -255,7 +280,6 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
# node symb labeled
else:
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
@@ -264,23 +288,22 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
vk_dict = {}
pass

# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, it would be slow.
# went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
ek_dict = {} # dict of edge kernels
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
@@ -292,7 +315,6 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
# edge symb labeled
else:
ke = edge_kernels['symb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
@@ -304,7 +326,6 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = kn(e1[2]['attributes'], e2[2]['attributes'])
@@ -314,7 +335,7 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge unlabeled
else:
ek_dict = {}
pass

# compute graph kernels
if vk_dict:
@@ -393,15 +414,12 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,


def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels,
edge_kernels, itr_item):
g1 = itr_item[0][0]
g2 = itr_item[0][1]
spl1 = itr_item[1][0]
spl2 = itr_item[1][1]
i = itr_item[2][0]
j = itr_item[2][1]
return i, j, structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs,
node_label, edge_label, node_kernels, edge_kernels)
edge_kernels, itr):
i = itr[0]
j = itr[1]
return i, j, structuralspkernel_do(G_gs[i], G_gs[j], G_spl[i], G_spl[j],
ds_attrs, node_label, edge_label,
node_kernels, edge_kernels)


def get_shortest_paths(G, weight, directed):


+ 273
- 121
pygraph/kernels/untilHPathKernel.py View File

@@ -9,16 +9,17 @@ import sys
sys.path.insert(0, "../")
import time
from collections import Counter
from itertools import chain, combinations_with_replacement
from itertools import chain
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm

import networkx as nx
import numpy as np
from suffix_tree import Tree, ukkonen

from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.parallel import parallel_gm
from pygraph.utils.trie import Trie


def untilhpathkernel(*args,
@@ -46,7 +47,7 @@ def untilhpathkernel(*args,
A kernel function applied using different notions of fingerprint
similarity.
compute_method: string
Computation method, 'suffix_tree' or 'naive'.
Computation method, 'trie' or 'naive'.

Return
------
@@ -69,20 +70,24 @@ def untilhpathkernel(*args,
for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type')

start_time = time.time()
start_time = time.time()

# ---- use pool.imap_unordered to parallel and track progress. ----
# get all paths of all graphs before calculating kernels to save time,
# but this may cost a lot of memory for large datasets.
pool = Pool(n_jobs)
all_paths = [[] for _ in range(len(Gn))]
getps_partial = partial(wrapper_find_all_paths_until_length, depth,
ds_attrs, node_label, edge_label)
itr = zip(Gn, range(0, len(Gn)))
if len(Gn) < 1000 * n_jobs:
if len(Gn) < 100 * n_jobs:
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 1000
chunksize = 100
all_paths = [[] for _ in range(len(Gn))]
if compute_method == 'trie':
getps_partial = partial(wrapper_find_all_path_as_trie, depth,
ds_attrs, node_label, edge_label)
else:
getps_partial = partial(wrapper_find_all_paths_until_length, depth,
ds_attrs, node_label, edge_label)
for i, ps in tqdm(
pool.imap_unordered(getps_partial, itr, chunksize),
desc='getting paths', file=sys.stdout):
@@ -90,57 +95,55 @@ def untilhpathkernel(*args,
pool.close()
pool.join()
# size = sys.getsizeof(all_paths)
# for item in all_paths:
# size += sys.getsizeof(item)
# for pppps in item:
# size += sys.getsizeof(pppps)
# print(size)
# ttt = time.time()
# # ---- ---- use pool.map to parallel ----
# for i, ps in tqdm(
# pool.map(getps_partial, range(0, len(Gn))),
# desc='getting paths', file=sys.stdout):
# all_paths[i] = ps
# print(time.time() - ttt)
# for g in Gn:
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label)
if compute_method == 'suffix_tree':
pass
## size = sys.getsizeof(all_paths)
## for item in all_paths:
## size += sys.getsizeof(item)
## for pppps in item:
## size += sys.getsizeof(pppps)
## print(size)
#
## ttt = time.time()
## # ---- ---- use pool.map to parallel ----
## for i, ps in tqdm(
## pool.map(getps_partial, range(0, len(Gn))),
## desc='getting paths', file=sys.stdout):
## all_paths[i] = ps
## print(time.time() - ttt)
#
if compute_method == 'trie':
def init_worker(trie_toshare):
global G_trie
G_trie = trie_toshare
do_partial = partial(wrapper_uhpath_do_trie, k_func)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_paths,), n_jobs=n_jobs)
else:
pool = Pool(n_jobs)
do_partial = partial(wrapper_uhpath_do_naive, k_func)
itr = zip(combinations_with_replacement(all_paths, 2),
combinations_with_replacement(range(0, len(Gn)), 2))
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
if len_itr < 1000 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 1000
for i, j, kernel in tqdm(
pool.imap_unordered(do_partial, itr, chunksize),
desc='calculating kernels', file=sys.stdout):
Kmatrix[i][j] = kernel
Kmatrix[j][i] = kernel
pool.close()
pool.join()
# # ---- direct running, normally use single CPU core. ----
# all_paths = [
# find_all_paths_until_length(
# Gn[i],
# depth,
# ds_attrs,
# node_label=node_label,
# edge_label=edge_label) for i in tqdm(
# range(0, len(Gn)), desc='getting paths', file=sys.stdout)
# ]
#
# if compute_method == 'suffix_tree':
def init_worker(plist_toshare):
global G_plist
G_plist = plist_toshare
do_partial = partial(wrapper_uhpath_do_naive, k_func)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_paths,), n_jobs=n_jobs)
#
#
## # ---- direct running, normally use single CPU core. ----
## all_paths = [
## find_all_paths_until_length(
## Gn[i],
## depth,
## ds_attrs,
## node_label=node_label,
## edge_label=edge_label) for i in tqdm(
## range(0, len(Gn)), desc='getting paths', file=sys.stdout)
## ]
##
# if compute_method == 'trie':
# # build generalized suffix tree of sets of paths for each graph.
# all_gstree = [paths2GSuffixTree(all_paths[i]) for i in tqdm(
# range(0, len(Gn)), desc='getting generalized suffix trees', file=sys.stdout)]
## all_gstree = [paths2GSuffixTree(all_paths[i]) for i in tqdm(
## range(0, len(Gn)), desc='getting generalized suffix trees', file=sys.stdout)]
#
# pbar = tqdm(
# total=((len(Gn) + 1) * len(Gn) / 2),
@@ -148,41 +151,38 @@ def untilhpathkernel(*args,
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# Kmatrix[i][j] = _untilhpathkernel_do_gst(all_gstree[i],
# all_gstree[j], all_paths[i], all_paths[j], k_func)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
# else:
# pbar = tqdm(
# total=((len(Gn) + 1) * len(Gn) / 2),
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j],
# k_func)
# Kmatrix[i][j] = _untilhpathkernel_do_trie(all_paths[i],
# all_paths[j], k_func)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)

## else:
## pbar = tqdm(
## total=((len(Gn) + 1) * len(Gn) / 2),
## desc='calculating kernels',
## file=sys.stdout)
## for i in range(0, len(Gn)):
## for j in range(i, len(Gn)):
## Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j],
## k_func)
## Kmatrix[j][i] = Kmatrix[i][j]
## pbar.update(1)
#
run_time = time.time() - start_time
print(
"\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---"
% (depth, len(Gn), run_time))

# print(Kmatrix[0][0:10])
return Kmatrix, run_time


def _untilhpathkernel_do_gst(gst1, gst2, paths1, paths2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs using
generalized suffix tree.
def _untilhpathkernel_do_trie(trie1, trie2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs using trie.

Parameters
----------
paths1, paths2 : list
List of paths in 2 graphs, where for unlabeled graphs, each path is
represented by a list of nodes; while for labeled graphs, each path is
represented by a string consists of labels of nodes and/or edges on
that path.
trie1, trie2 : list
Tries that contains all paths in 2 graphs.
k_func : function
A kernel function applied using different notions of fingerprint
similarity.
@@ -192,30 +192,105 @@ def _untilhpathkernel_do_gst(gst1, gst2, paths1, paths2, k_func):
kernel : float
Path kernel up to h between 2 graphs.
"""
all_paths = list(set(paths1 + paths2))

if k_func == 'tanimoto':
length_union = len(set(paths1 + paths2))
kernel = (len(set(paths1)) + len(set(paths2)) -
length_union) / length_union
# vector1 = [(1 if path in paths1 else 0) for path in all_paths]
# vector2 = [(1 if path in paths2 else 0) for path in all_paths]
# kernel_uv = np.dot(vector1, vector2)
# kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)

else: # MinMax kernel
path_count1 = Counter(paths1)
path_count2 = Counter(paths2)
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
for key in all_paths]
vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0)
for key in all_paths]
kernel = np.sum(np.minimum(vector1, vector2)) / \
np.sum(np.maximum(vector1, vector2))
if k_func == 'tanimoto':
# traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied.
def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
for key, node in root['children'].items():
if node['isEndOfWord']:
pcurrent.append(key)
setlist[1] += 1
count2 = trie2.searchWord(pcurrent)
if count2 != 0:
setlist[0] += 1
if node['children'] != {}:
traverseTrie1t(node, trie2, setlist, pcurrent)
else:
del pcurrent[-1]
if pcurrent != []:
del pcurrent[-1]
# traverse all paths in graph2 and find out those that are not in
# graph1. Deep-first search is applied.
def traverseTrie2t(root, trie1, setlist, pcurrent=[]):
for key, node in root['children'].items():
if node['isEndOfWord']:
pcurrent.append(key)
# print(node['count'])
count1 = trie1.searchWord(pcurrent)
if count1 == 0:
setlist[1] += 1
if node['children'] != {}:
traverseTrie2t(node, trie1, setlist, pcurrent)
else:
del pcurrent[-1]
if pcurrent != []:
del pcurrent[-1]
setlist = [0, 0] # intersection and union of path sets of g1, g2.
# print(trie1.root)
# print(trie2.root)
traverseTrie1t(trie1.root, trie2, setlist)
# print(setlist)
traverseTrie2t(trie2.root, trie1, setlist)
# print(setlist)
kernel = setlist[0] / setlist[1]
else: # MinMax kernel
# traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied.
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
for key, node in root['children'].items():
if node['isEndOfWord']:
pcurrent.append(key)
# print(node['count'])
count1 = node['count']
count2 = trie2.searchWord(pcurrent)
sumlist[0] += min(count1, count2)
sumlist[1] += max(count1, count2)
if node['children'] != {}:
traverseTrie1m(node, trie2, sumlist, pcurrent)
else:
del pcurrent[-1]
if pcurrent != []:
del pcurrent[-1]
# traverse all paths in graph2 and find out those that are not in
# graph1. Deep-first search is applied.
def traverseTrie2m(root, trie1, sumlist, pcurrent=[]):
for key, node in root['children'].items():
if node['isEndOfWord']:
pcurrent.append(key)
# print(node['count'])
count1 = trie1.searchWord(pcurrent)
if count1 == 0:
sumlist[1] += node['count']
if node['children'] != {}:
traverseTrie2m(node, trie1, sumlist, pcurrent)
else:
del pcurrent[-1]
if pcurrent != []:
del pcurrent[-1]
sumlist = [0, 0] # sum of mins and sum of maxs
# print(trie1.root)
# print(trie2.root)
traverseTrie1m(trie1.root, trie2, sumlist)
# print(sumlist)
traverseTrie2m(trie2.root, trie1, sumlist)
# print(sumlist)
kernel = sumlist[0] / sumlist[1]

return kernel


def wrapper_uhpath_do_trie(k_func, itr):
i = itr[0]
j = itr[1]
return i, j, _untilhpathkernel_do_trie(G_trie[i], G_trie[j], k_func)

def _untilhpathkernel_do_naive(paths1, paths2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs naively.

@@ -259,12 +334,10 @@ def _untilhpathkernel_do_naive(paths1, paths2, k_func):
return kernel


def wrapper_uhpath_do_naive(k_func, itr_item):
plist1 = itr_item[0][0]
plist2 = itr_item[0][1]
i = itr_item[1][0]
j = itr_item[1][1]
return i, j, _untilhpathkernel_do_naive(plist1, plist2, k_func)
def wrapper_uhpath_do_naive(k_func, itr):
i = itr[0]
j = itr[1]
return i, j, _untilhpathkernel_do_naive(G_plist[i], G_plist[j], k_func)


# @todo: (can be removed maybe) this method find paths repetively, it could be faster.
@@ -332,6 +405,93 @@ def find_all_paths_until_length(G,
# all_paths.extend(new_paths)

# consider labels
return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)
def wrapper_find_all_paths_until_length(length, ds_attrs, node_label,
edge_label, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, find_all_paths_until_length(g, length, ds_attrs,
node_label=node_label, edge_label=edge_label)


def find_all_path_as_trie(G,
length,
ds_attrs,
node_label='atom',
edge_label='bond_type'):
# time1 = time.time()
# all_path = find_all_paths_until_length(G, length, ds_attrs,
# node_label=node_label,
# edge_label=edge_label)
# ptrie = Trie()
# for path in all_path:
# ptrie.insertWord(path)
# ptrie = Trie()
# path_l = [[n] for n in G.nodes] # paths of length l
# path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
# for p in path_l_str:
# ptrie.insertWord(p)
# for l in range(1, length + 1):
# path_lplus1 = []
# for path in path_l:
# for neighbor in G[path[-1]]:
# if neighbor not in path:
# tmp = path + [neighbor]
## if tmp[::-1] not in path_lplus1:
# path_lplus1.append(tmp)
# path_l = path_lplus1[:]
# # consider labels
# path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
# for p in path_l_str:
# ptrie.insertWord(p)
#
# print(time.time() - time1)
# print(ptrie.root)
# print()
# traverse all paths up to length h in a graph and construct a trie with
# them. Deep-first search is applied. Notice the reverse of each path is
# also stored to the trie.
def traverseGraph(root, ptrie, length, G, ds_attrs, node_label, edge_label,
pcurrent=[]):
if len(pcurrent) < length + 1:
for neighbor in G[root]:
if neighbor not in pcurrent:
pcurrent.append(neighbor)
plstr = paths2labelseqs([pcurrent], G, ds_attrs,
node_label, edge_label)
ptrie.insertWord(plstr[0])
traverseGraph(neighbor, ptrie, length, G, ds_attrs,
node_label, edge_label, pcurrent)
del pcurrent[-1]


ptrie = Trie()
path_l = [[n] for n in G.nodes] # paths of length l
path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
for p in path_l_str:
ptrie.insertWord(p)
for n in G.nodes:
traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label,
pcurrent=[n])
return ptrie


def wrapper_find_all_path_as_trie(length, ds_attrs, node_label,
edge_label, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, find_all_path_as_trie(g, length, ds_attrs,
node_label=node_label, edge_label=edge_label)


def paths2labelseqs(plist, G, ds_attrs, node_label, edge_label):
if ds_attrs['node_labeled']:
if ds_attrs['edge_labeled']:
path_strs = [
@@ -341,9 +501,8 @@ def find_all_paths_until_length(G,
(G.node[node][node_label],
G[node][path[idx + 1]][edge_label])
for idx, node in enumerate(path[:-1]))) +
[G.node[path[-1]][node_label]]) for path in all_paths
[G.node[path[-1]][node_label]]) for path in plist
]

# path_strs = []
# for path in all_paths:
# strlist = list(
@@ -355,7 +514,7 @@ def find_all_paths_until_length(G,
else:
path_strs = [
tuple([G.node[node][node_label] for node in path])
for path in all_paths
for path in plist
]
return path_strs
else:
@@ -364,22 +523,15 @@ def find_all_paths_until_length(G,
tuple([] if len(path) == 1 else [
G[node][path[idx + 1]][edge_label]
for idx, node in enumerate(path[:-1])
]) for path in all_paths
]) for path in plist
]
else:
return [tuple([len(path)]) for path in all_paths]
def wrapper_find_all_paths_until_length(length, ds_attrs, node_label,
edge_label, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, find_all_paths_until_length(g, length, ds_attrs,
node_label=node_label, edge_label=edge_label)
return [tuple(['0' for node in path]) for path in plist]
# return [tuple([len(path)]) for path in all_paths]

def paths2GSuffixTree(paths):
return Tree(paths, builder=ukkonen.Builder)
#
#def paths2GSuffixTree(paths):
# return Tree(paths, builder=ukkonen.Builder)


# def find_paths(G, source_node, length):


+ 28
- 3
pygraph/utils/graphdataset.py View File

@@ -61,13 +61,26 @@ def get_dataset_attributes(Gn,
return nx.is_directed(Gn[0])

def get_ave_node_degree(Gn):
return np.mean([np.amax(list(dict(G.degree()).values())) for G in Gn])
return np.mean([np.mean(list(dict(G.degree()).values())) for G in Gn])

def get_max_node_degree(Gn):
return np.amax([np.amax(list(dict(G.degree()).values())) for G in Gn])
return np.amax([np.mean(list(dict(G.degree()).values())) for G in Gn])

def get_min_node_degree(Gn):
return np.amin([np.amax(list(dict(G.degree()).values())) for G in Gn])
return np.amin([np.mean(list(dict(G.degree()).values())) for G in Gn])
# get fill factor, the number of non-zero entries in the adjacency matrix.
def get_ave_fill_factor(Gn):
return np.mean([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_max_fill_factor(Gn):
return np.amax([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_min_fill_factor(Gn):
return np.amin([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_substructures(Gn):
subs = set()
@@ -137,6 +150,9 @@ def get_dataset_attributes(Gn,
'ave_node_degree',
'min_node_degree',
'max_node_degree',
'ave_fill_factor',
'min_fill_factor',
'max_fill_factor',
'node_label_num',
'edge_label_num',
'node_attr_dim',
@@ -219,6 +235,15 @@ def get_dataset_attributes(Gn,

if 'min_node_degree' in attr_names:
attrs.update({'min_node_degree': get_min_node_degree(Gn)})
if 'ave_fill_factor' in attr_names:
attrs.update({'ave_fill_factor': get_ave_fill_factor(Gn)})

if 'max_fill_factor' in attr_names:
attrs.update({'max_fill_factor': get_max_fill_factor(Gn)})

if 'min_fill_factor' in attr_names:
attrs.update({'min_fill_factor': get_min_fill_factor(Gn)})

if 'substructures' in attr_names:
attrs.update({'substructures': get_substructures(Gn)})


+ 5
- 9
pygraph/utils/kernels.py View File

@@ -26,27 +26,23 @@ def deltakernel(x, y):


def gaussiankernel(x, y, gamma=None):
"""Gaussian kernel. Use sklearn.metrics.pairwise.rbf_kernel instead.
Compute the rbf (gaussian) kernel between X and Y:
"""Gaussian kernel.
Compute the rbf (gaussian) kernel between x and y:

K(x, y) = exp(-gamma ||x-y||^2)

for each pair of rows x in X and y in Y.
K(x, y) = exp(-gamma ||x-y||^2).

Read more in the :ref:`User Guide <rbf_kernel>`.

Parameters
----------
X : array of shape (n_features)

Y : array of shape (n_features)
x, y : array

gamma : float, default None
If None, defaults to 1.0 / n_features

Returns
-------
kernel : integer
kernel : float
"""
if gamma is None:
gamma = 1.0 / len(x)


+ 204
- 49
pygraph/utils/model_selection_precomputed.py View File

@@ -8,7 +8,7 @@ from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import KFold, train_test_split, ParameterGrid

#from joblib import Parallel, delayed
from multiprocessing import Pool
from multiprocessing import Pool, Array
from functools import partial
import sys
sys.path.insert(0, "../")
@@ -19,7 +19,9 @@ import datetime
from pygraph.utils.graphfiles import loadDataset
from tqdm import tqdm

#from memory_profiler import profile

#@profile
def model_selection_for_precomputed_kernel(datafile,
estimator,
param_grid_precomputed,
@@ -91,8 +93,12 @@ def model_selection_for_precomputed_kernel(datafile,
# Load the dataset
print()
print('\n1. Loading dataset from file...')
dataset, y = loadDataset(
datafile, filename_y=datafile_y, extra_params=extra_params)
if isinstance(datafile, str):
dataset, y_all = loadDataset(
datafile, filename_y=datafile_y, extra_params=extra_params)
else: # load data directly from variable.
dataset = datafile
y_all = datafile_y

# import matplotlib.pyplot as plt
# import networkx as nx
@@ -117,8 +123,13 @@ def model_selection_for_precomputed_kernel(datafile,
tts = time.time() # start training time
nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)
for idx, params_out in enumerate(param_list_precomputed):
y = y_all[:]
params_out['n_jobs'] = n_jobs
rtn_data = estimator(dataset, **params_out)
# print(dataset)
# import networkx as nx
# nx.draw_networkx(dataset[1])
# plt.show()
rtn_data = estimator(dataset[:], **params_out)
Kmatrix = rtn_data[0]
current_run_time = rtn_data[1]
# for some kernels, some graphs in datasets may not meet the
@@ -126,6 +137,8 @@ def model_selection_for_precomputed_kernel(datafile,
if len(rtn_data) == 3:
idx_trim = rtn_data[2] # the index of trimmed graph list
y = [y[idxt] for idxt in idx_trim] # trim y accordingly
# Kmatrix = np.random.rand(2250, 2250)
# current_run_time = 0.1
Kmatrix_diag = Kmatrix.diagonal().copy()
# remove graphs whose kernels with themselves are zeros
@@ -146,7 +159,7 @@ def model_selection_for_precomputed_kernel(datafile,
print('the gram matrix is: ')
str_fw += 'the gram matrix is:\n\n'
else:
print('the gram matrix with parameters', params_out, 'is: ')
print('the gram matrix with parameters', params_out, 'is: \n\n')
str_fw += 'the gram matrix with parameters %s is:\n\n' % params_out
if len(Kmatrix) < 2:
nb_gm_ignore += 1
@@ -206,30 +219,52 @@ def model_selection_for_precomputed_kernel(datafile,
'3. Fitting and predicting using nested cross validation. This could really take a while...'
)
# pool = Pool(n_jobs)
# trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)
# ---- use pool.imap_unordered to parallel and track progress. ----
# train_pref = []
# val_pref = []
# test_pref = []
## if NUM_TRIALS < 1000 * n_jobs:
## chunksize = int(NUM_TRIALS / n_jobs) + 1
## else:
## chunksize = 1000
# chunksize = 1
# for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):
# train_pref.append(o1)
# val_pref.append(o2)
# test_pref.append(o3)
# pool.close()
# pool.join()
# def func_assign(result, var_to_assign):
# for idx, itm in enumerate(var_to_assign):
# itm.append(result[idx])
# trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)
#
# parallel_me(trial_do_partial, range(NUM_TRIALS), func_assign,
# [train_pref, val_pref, test_pref], glbv=gram_matrices,
# method='imap_unordered', n_jobs=n_jobs, chunksize=1,
# itr_desc='cross validation')
def init_worker(gms_toshare):
global G_gms
G_gms = gms_toshare
# gram_matrices = np.array(gram_matrices)
# gms_shape = gram_matrices.shape
# gms_array = Array('d', np.reshape(gram_matrices.copy(), -1, order='C'))
# pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gms_array, gms_shape))
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))
trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)
train_pref = []
val_pref = []
test_pref = []
# if NUM_TRIALS < 1000 * n_jobs:
# chunksize = int(NUM_TRIALS / n_jobs) + 1
# else:
# chunksize = 1000
chunksize = 1
for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):
train_pref.append(o1)
val_pref.append(o2)
test_pref.append(o3)
pool.close()
pool.join()
# ---- use pool.map to parallel. ----
pool = Pool(n_jobs)
trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)
result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))
train_pref = [item[0] for item in result_perf]
val_pref = [item[1] for item in result_perf]
test_pref = [item[2] for item in result_perf]
# # ---- use pool.map to parallel. ----
# pool = Pool(n_jobs)
# trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y[0:250], model_type)
# result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))
# train_pref = [item[0] for item in result_perf]
# val_pref = [item[1] for item in result_perf]
# test_pref = [item[2] for item in result_perf]
# # ---- direct running, normally use a single CPU core. ----
# train_pref = []
@@ -422,6 +457,7 @@ def model_selection_for_precomputed_kernel(datafile,
str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n'
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices
param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
y = gmfile['y'].tolist()
@@ -430,18 +466,18 @@ def model_selection_for_precomputed_kernel(datafile,
print(
'3. Fitting and predicting using nested cross validation. This could really take a while...'
)
pool = Pool(n_jobs)
trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)
# ---- use pool.imap_unordered to parallel and track progress. ----
def init_worker(gms_toshare):
global G_gms
G_gms = gms_toshare

pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))
trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)
train_pref = []
val_pref = []
test_pref = []
if NUM_TRIALS < 100:
chunksize, extra = divmod(NUM_TRIALS, n_jobs * 4)
if extra:
chunksize += 1
else:
chunksize = 100
chunksize = 1
for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):
train_pref.append(o1)
val_pref.append(o2)
@@ -536,22 +572,24 @@ def model_selection_for_precomputed_kernel(datafile,
str_fw += 'train_std: %s\n\n' % train_std

print()
average_gram_matrix_time = np.mean(gram_matrix_time)
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
best_gram_matrix_time = [
gram_matrix_time[i] for i in best_params_index[0]
]
ave_bgmt = np.mean(best_gram_matrix_time)
std_bgmt = np.std(best_gram_matrix_time, ddof=1)
print(
'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
.format(average_gram_matrix_time, std_gram_matrix_time))
print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(
ave_bgmt, std_bgmt))
tt_poster = time.time() - tts # training time with hyper-param choices who did not participate in calculation of gram matrices
# average_gram_matrix_time = np.mean(gram_matrix_time)
# std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
# best_gram_matrix_time = [
# gram_matrix_time[i] for i in best_params_index[0]
# ]
# ave_bgmt = np.mean(best_gram_matrix_time)
# std_bgmt = np.std(best_gram_matrix_time, ddof=1)
# print(
# 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
# .format(average_gram_matrix_time, std_gram_matrix_time))
# print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(
# ave_bgmt, std_bgmt))
print(
'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format(
tt_poster))
print('total training time with all hyper-param choices: {:.2f}s'.format(
tt_poster + np.sum(gram_matrix_time)))
# str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time)
# str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt)
str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster)
@@ -600,7 +638,7 @@ def model_selection_for_precomputed_kernel(datafile,
sorted(table_dict.items(),
key=lambda i: keyorder.index(i[0]))),
headers='keys')
print(tb_print)
# print(tb_print)
str_fw += 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print

# open file to save all results for this dataset.
@@ -618,8 +656,11 @@ def model_selection_for_precomputed_kernel(datafile,
f.write(str_fw + '\n\n\n' + content)


def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, trial): # Test set level
def trial_do(param_list_pre_revised, param_list, y, model_type, trial): # Test set level

# # get gram matrices from global variables.
# gram_matrices = np.reshape(G_gms.copy(), G_gms_shape, order='C')
# Arrays to store scores
train_pref = np.zeros((len(param_list_pre_revised), len(param_list)))
val_pref = np.zeros((len(param_list_pre_revised), len(param_list)))
@@ -635,6 +676,11 @@ def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, t
# print()
# loop for each outer param tuple
for index_out, params_out in enumerate(param_list_pre_revised):
# get gram matrices from global variables.
# gm_now = G_gms[index_out * G_gms_shape[1] * G_gms_shape[2]:(index_out + 1) * G_gms_shape[1] * G_gms_shape[2]]
# gm_now = np.reshape(gm_now.copy(), (G_gms_shape[1], G_gms_shape[2]), order='C')
gm_now = G_gms[index_out].copy()
# split gram matrix and y to app and test sets.
indices = range(len(y))
# The argument "random_state" in function "train_test_split" can not be
@@ -652,7 +698,7 @@ def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, t
rdm_seed_out = (rdm_seed_out + int(rdm_seed_out_l[index_out])) % (2 ** 32 - 1)
# print(trial, rdm_seed_out)
X_app, X_test, y_app, y_test, idx_app, idx_test = train_test_split(
gram_matrices[index_out], y, indices, test_size=0.1,
gm_now, y, indices, test_size=0.1,
random_state=rdm_seed_out, shuffle=True)
# print(trial, idx_app, idx_test)
# print()
@@ -775,3 +821,112 @@ def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, t
# print('test_pref: ', test_pref)

return train_pref, val_pref, test_pref


def compute_gram_matrices(dataset, y, estimator, param_list_precomputed,
results_dir, ds_name,
n_jobs=1, str_fw='', verbose=True):
gram_matrices = [
] # a list to store gram matrices for all param_grid_precomputed
gram_matrix_time = [
] # a list to store time to calculate gram matrices
param_list_pre_revised = [
] # list to store param grids precomputed ignoring the useless ones
nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)
for idx, params_out in enumerate(param_list_precomputed):
params_out['n_jobs'] = n_jobs
# print(dataset)
# import networkx as nx
# nx.draw_networkx(dataset[1])
# plt.show()
rtn_data = estimator(dataset[:], **params_out)
Kmatrix = rtn_data[0]
current_run_time = rtn_data[1]
# for some kernels, some graphs in datasets may not meet the
# kernels' requirements for graph structure. These graphs are trimmed.
if len(rtn_data) == 3:
idx_trim = rtn_data[2] # the index of trimmed graph list
y = [y[idxt] for idxt in idx_trim] # trim y accordingly

Kmatrix_diag = Kmatrix.diagonal().copy()
# remove graphs whose kernels with themselves are zeros
nb_g_ignore = 0
for idxk, diag in enumerate(Kmatrix_diag):
if diag == 0:
Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)
Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)
nb_g_ignore += 1
# normalization
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]

if verbose:
print()
if params_out == {}:
if verbose:
print('the gram matrix is: ')
str_fw += 'the gram matrix is:\n\n'
else:
if verbose:
print('the gram matrix with parameters', params_out, 'is: ')
str_fw += 'the gram matrix with parameters %s is:\n\n' % params_out
if len(Kmatrix) < 2:
nb_gm_ignore += 1
if verbose:
print('ignored, as at most only one of all its diagonal value is non-zero.')
str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\n\n'
else:
if np.isnan(Kmatrix).any(
): # if the matrix contains elements that are not numbers
nb_gm_ignore += 1
if verbose:
print('ignored, as it contains elements that are not numbers.')
str_fw += 'ignored, as it contains elements that are not numbers.\n\n'
else:
# print(Kmatrix)
str_fw += np.array2string(
Kmatrix,
separator=',') + '\n\n'
# separator=',',
# threshold=np.inf,
# floatmode='unique') + '\n\n'

fig_file_name = results_dir + '/GM[ds]' + ds_name
if params_out != {}:
fig_file_name += '[params]' + str(idx)
plt.imshow(Kmatrix)
plt.colorbar()
plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)
# plt.show()
plt.clf()
gram_matrices.append(Kmatrix)
gram_matrix_time.append(current_run_time)
param_list_pre_revised.append(params_out)
if nb_g_ignore > 0:
if verbose:
print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)
str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore
if verbose:
print()
print(
'{} gram matrices are calculated, {} of which are ignored.'.format(
len(param_list_precomputed), nb_gm_ignore))
str_fw += '{} gram matrices are calculated, {} of which are ignored.\n\n'.format(len(param_list_precomputed), nb_gm_ignore)
str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\n\n'
str_fw += ''.join([
'{}: {}\n'.format(idx, params_out)
for idx, params_out in enumerate(param_list_precomputed)
])
return gram_matrices, gram_matrix_time, param_list_pre_revised, y, str_fw


def read_gram_matrices_from_file(results_dir, ds_name):
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
y = gmfile['y'].tolist()
return gram_matrices, param_list_pre_revised, y

+ 86
- 0
pygraph/utils/openblassettings.py View File

@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 19 15:31:01 2018
A script to set the thread number of OpenBLAS (if used).
Some modules (such as Numpy, Scipy, sklearn) using OpenBLAS perform parallel
computation automatically, which causes conflict when other paralleling modules
such as multiprossing.Pool, highly increase the computing time. By setting
thread to 1, OpenBLAS is forced to use single thread/CPU, thus this conflict
can be avoided.
-e.g:
with num_threads(8):
np.dot(x, y)
@author: ali_m
@Reference: ali_m, https://stackoverflow.com/a/29582987, 2018.12
"""

import contextlib
import ctypes
from ctypes.util import find_library
import os

# Prioritize hand-compiled OpenBLAS library over version in /usr/lib/
# from Ubuntu repos
try_paths = ['/opt/OpenBLAS/lib/libopenblas.so',
'/lib/libopenblas.so',
'/usr/lib/libopenblas.so.0',
find_library('openblas')]
openblas_lib = None
for libpath in try_paths:
try:
openblas_lib = ctypes.cdll.LoadLibrary(libpath)
break
except OSError:
continue
if openblas_lib is None:
raise EnvironmentError('Could not locate an OpenBLAS shared library', 2)


def set_num_threads(n):
"""Set the current number of threads used by the OpenBLAS server."""
openblas_lib.openblas_set_num_threads(int(n))


# At the time of writing these symbols were very new:
# https://github.com/xianyi/OpenBLAS/commit/65a847c
try:
openblas_lib.openblas_get_num_threads()
def get_num_threads():
"""Get the current number of threads used by the OpenBLAS server."""
return openblas_lib.openblas_get_num_threads()
except AttributeError:
def get_num_threads():
"""Dummy function (symbol not present in %s), returns -1."""
return -1
pass

try:
len(os.sched_getaffinity(0))
def get_num_procs():
"""Get the total number of physical processors"""
return len(os.sched_getaffinity(0))
except AttributeError:
def get_num_procs():
"""Dummy function (symbol not present), returns -1."""
return -1
pass


@contextlib.contextmanager
def num_threads(n):
"""Temporarily changes the number of OpenBLAS threads.

Example usage:

print("Before: {}".format(get_num_threads()))
with num_threads(n):
print("In thread context: {}".format(get_num_threads()))
print("After: {}".format(get_num_threads()))
"""
old_n = get_num_threads()
set_num_threads(n)
try:
yield
finally:
set_num_threads(old_n)

+ 60
- 0
pygraph/utils/parallel.py View File

@@ -0,0 +1,60 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 11 11:39:46 2018
Parallel aid functions.
@author: ljia
"""
import multiprocessing
from multiprocessing import Pool
from tqdm import tqdm
import sys

def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker=None,
glbv=None, method=None, n_jobs=None, chunksize=None, itr_desc=''):
'''
'''
if method == 'imap_unordered':
if glbv: # global varibles required.
# def init_worker(v_share):
# global G_var
# G_var = v_share
with Pool(processes=n_jobs, initializer=init_worker,
initargs=glbv) as pool:
if n_jobs == None:
n_jobs = multiprocessing.cpu_count()
if chunksize == None:
if len_itr < 100 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 100
for result in tqdm(pool.imap_unordered(func, itr, chunksize),
desc=itr_desc, file=sys.stdout):
func_assign(result, var_to_assign)
else:
with Pool(processes=n_jobs) as pool:
if n_jobs == None:
n_jobs = multiprocessing.cpu_count()
if chunksize == None:
if len_itr < 100 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 100
for result in tqdm(pool.imap_unordered(func, itr, chunksize),
desc=itr_desc, file=sys.stdout):
func_assign(result, var_to_assign)

def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None,
method='imap_unordered', n_jobs=None, chunksize=None):
from itertools import combinations_with_replacement
def func_assign(result, var_to_assign):
var_to_assign[result[0]][result[1]] = result[2]
var_to_assign[result[1]][result[0]] = result[2]
itr = combinations_with_replacement(range(0, len(Gn)), 2)
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr,
init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs,
chunksize=chunksize, itr_desc='calculating kernels')

+ 111
- 0
pygraph/utils/trie.py View File

@@ -0,0 +1,111 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 30 10:48:49 2019

Trie (prefix tree)
@author: ljia
@references:
https://viblo.asia/p/nlp-build-a-trie-data-structure-from-scratch-with-python-3P0lPzroKox, 2019.1
"""

import pickle
import json

""" Trie class
"""
class Trie:
# init Trie class
def __init__(self):
self.root = self.getNode()

def getNode(self):
return {"isEndOfWord": False, "children": {}}

def insertWord(self, word):
current = self.root
for ch in word:

if ch in current["children"]:
node = current["children"][ch]
else:
node = self.getNode()
current["children"][ch] = node

current = node
current["isEndOfWord"] = True
if 'count' in current:
current['count'] += 1
else:
current['count'] = 1

def searchWord(self, word):
current = self.root
for ch in word:
if ch not in current["children"]:
return 0
node = current["children"][ch]

current = node
if 'count' in current:
return current["count"]
else:
return 0

def searchWordPrefix(self, word):
current = self.root
for ch in word:
if not current["children"].has_key(ch):
return False
node = current["children"][ch]

current = node
# return True if children contain keys and values
return bool(current["children"])

def deleteWord(self, word):
self._delete(self.root, word, 0)

def _delete(self, current, word, index):
if(index == len(word)):
if not current["isEndOfWord"]:
return False
current["isEndOfWord"] = False
return len(current["children"].keys()) == 0

ch = word[index]
if not current["children"].has_key(ch):
return False
node = current["children"][ch]

should_delete_current_node = self._delete(node, word, index + 1)

if should_delete_current_node:
current["children"].pop(ch)
return len(current["children"].keys()) == 0

return False

def save_to_pickle(self, file_name):
f = open(file_name + ".pkl", "wb")
pickle.dump(self.root, f)
f.close()

def load_from_pickle(self, file_name):
f = open(file_name + ".pkl", "rb")
self.root = pickle.load(f)
f.close()
def to_json(self):
return json.dump(self.root)

def save_to_json(self, file_name):
json_data = json.dumps(self.root)
f = open(file_name + ".json", "w")
f.write(json_data)
f.close()

def load_from_json(self, file_name):
json_file = open(file_name + ".json", "r")
self.root = json.load(json_file)
json_file.close()

+ 1
- 1
pygraph/utils/utils.py View File

@@ -1,5 +1,6 @@
import networkx as nx
import numpy as np
#from itertools import product

# from tqdm import tqdm

@@ -146,7 +147,6 @@ def direct_product(G1, G2, node_label, edge_label):
"""
# arrange all graphs in a list
from itertools import product

# G = G.to_directed()
gt = nx.DiGraph()
# add nodes


Loading…
Cancel
Save