2. fix two dimension bugs in model selection function when computing std.v0.1
@@ -138,7 +138,8 @@ | |||||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | ||||
" ds_name=ds['name'],\n", | " ds_name=ds['name'],\n", | ||||
" n_jobs=multiprocessing.cpu_count(),\n", | " n_jobs=multiprocessing.cpu_count(),\n", | ||||
" read_gm_from_file=False)\n", | |||||
" read_gm_from_file=False,\n", | |||||
" verbose=True)\n", | |||||
" print()" | " print()" | ||||
] | ] | ||||
} | } | ||||
@@ -82,5 +82,6 @@ for ds in dslist: | |||||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | ||||
ds_name=ds['name'], | ds_name=ds['name'], | ||||
n_jobs=multiprocessing.cpu_count(), | n_jobs=multiprocessing.cpu_count(), | ||||
read_gm_from_file=False) | |||||
read_gm_from_file=False, | |||||
verbose=True) | |||||
print() | print() |
@@ -170,7 +170,8 @@ | |||||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | ||||
" ds_name=ds['name'],\n", | " ds_name=ds['name'],\n", | ||||
" n_jobs=multiprocessing.cpu_count(),\n", | " n_jobs=multiprocessing.cpu_count(),\n", | ||||
" read_gm_from_file=False)\n", | |||||
" read_gm_from_file=False,\n", | |||||
" verbose=True)\n", | |||||
" print()" | " print()" | ||||
] | ] | ||||
} | } | ||||
@@ -81,5 +81,6 @@ for ds in dslist: | |||||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | ||||
ds_name=ds['name'], | ds_name=ds['name'], | ||||
n_jobs=multiprocessing.cpu_count(), | n_jobs=multiprocessing.cpu_count(), | ||||
read_gm_from_file=False) | |||||
read_gm_from_file=False, | |||||
verbose=True) | |||||
print() | print() |
@@ -23,14 +23,14 @@ | |||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5067.96it/s]\n" | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5308.25it/s]\n" | |||||
] | ] | ||||
}, | }, | ||||
{ | { | ||||
"name": "stderr", | "name": "stderr", | ||||
"output_type": "stream", | "output_type": "stream", | ||||
"text": [ | "text": [ | ||||
"../pygraph/kernels/randomWalkKernel.py:104: UserWarning: All labels are ignored.\n", | |||||
"../pygraph/kernels/randomWalkKernel.py:108: UserWarning: All labels are ignored.\n", | |||||
" warnings.warn('All labels are ignored.')\n" | " warnings.warn('All labels are ignored.')\n" | ||||
] | ] | ||||
}, | }, | ||||
@@ -38,132 +38,132 @@ | |||||
"name": "stdout", | "name": "stdout", | ||||
"output_type": "stream", | "output_type": "stream", | ||||
"text": [ | "text": [ | ||||
"calculating kernels: 16836it [00:00, 76328.32it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 65408.89it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.38956499099731445 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.4157981872558594 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.1, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.1, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5928.69it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 76159.65it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5205.09it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 73715.56it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.3680381774902344 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.36714887619018555 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5599.99it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 68122.63it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5344.96it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 68817.65it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.37236690521240234 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.3666379451751709 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.001, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.001, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5444.75it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 65554.92it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5295.73it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 74865.49it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.37699007987976074 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.36979222297668457 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.0001, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.0001, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 4451.65it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 64227.38it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5040.80it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 70923.54it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.40868353843688965 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.3692610263824463 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-05, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-05, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5455.55it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 69253.03it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5326.60it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 73697.55it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.4017300605773926 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.37317800521850586 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-06, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-06, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5691.60it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 67461.93it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5705.98it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 64238.65it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.3712270259857178 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.36565732955932617 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-07, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-07, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5229.98it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 70363.33it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 4833.15it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 69971.77it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.37551283836364746 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.37798523902893066 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-08, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-08, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5023.84it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 71457.21it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 4170.94it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 64187.38it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.39424848556518555 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.39433860778808594 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-09, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-09, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5359.14it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 73095.73it/s]\n", | |||||
"compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5273.43it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 69555.28it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of random walk kernel of size 183 built in 0.37821507453918457 seconds ---\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 0.3833920955657959 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-10, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-10, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"10 gram matrices are calculated, 0 of which are ignored.\n", | "10 gram matrices are calculated, 0 of which are ignored.\n", | ||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | ||||
"cross validation: 30it [00:33, 1.10s/it]\n", | |||||
"cross validation: 30it [00:33, 1.11s/it]\n", | |||||
"\n", | "\n", | ||||
"4. Getting final performance...\n", | "4. Getting final performance...\n", | ||||
"best_params_out: [{'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8}]\n", | |||||
"best_params_in: [{'alpha': 3.1622776601683795e-10}]\n", | |||||
"best_params_out: [{'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8, 'verbose': True}]\n", | |||||
"best_params_in: [{'alpha': 1e-10}]\n", | |||||
"\n", | "\n", | ||||
"best_val_perf: 31.894498817348637\n", | |||||
"best_val_std: 0.5235865101548381\n", | |||||
"final_performance: [32.04579601256704]\n", | |||||
"final_confidence: [3.9737533137212138]\n", | |||||
"train_performance: [30.830572265896325]\n", | |||||
"train_std: [0.44072573889937117]\n", | |||||
"best_val_perf: 31.76835551233969\n", | |||||
"best_val_std: 0.43269972907929183\n", | |||||
"final_performance: [32.391882524496765]\n", | |||||
"final_confidence: [2.6542337929023336]\n", | |||||
"train_performance: [30.70127313658435]\n", | |||||
"train_std: [0.31861204198126475]\n", | |||||
"\n", | "\n", | ||||
"time to calculate gram matrix with different hyper-params: 0.38±0.01s\n", | |||||
"time to calculate best gram matrix: 0.37±nans\n", | |||||
"total training time with all hyper-param choices: 40.27s\n", | |||||
"time to calculate gram matrix with different hyper-params: 0.38±0.02s\n", | |||||
"time to calculate best gram matrix: 0.37±0.00s\n", | |||||
"total training time with all hyper-param choices: 40.53s\n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"--- This is a regression problem ---\n", | "--- This is a regression problem ---\n", | ||||
@@ -175,24 +175,30 @@ | |||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 26008.32it/s]\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stderr", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", | |||||
" keepdims=keepdims)\n", | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||||
" ret = ret.dtype.type(ret / rcount)\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"calculating kernels: 12501it [00:01, 6173.52it/s]" | |||||
"reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 28950.24it/s]\n", | |||||
"calculating kernels: 16836it [00:02, 6540.43it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 2.6675093173980713 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'conjugate', 'edge_kernels': {'symb': <function deltakernel at 0x7fe9b0a2d730>, 'nsymb': <function gaussiankernel at 0x7fe9b0a2d7b8>, 'mix': functools.partial(<function kernelproduct at 0x7fe9b0a2d8c8>, <function deltakernel at 0x7fe9b0a2d730>, <function gaussiankernel at 0x7fe9b0a2d7b8>)}, 'node_kernels': {'symb': <function deltakernel at 0x7fe9b0a2d730>, 'nsymb': <function gaussiankernel at 0x7fe9b0a2d7b8>, 'mix': functools.partial(<function kernelproduct at 0x7fe9b0a2d8c8>, <function deltakernel at 0x7fe9b0a2d730>, <function gaussiankernel at 0x7fe9b0a2d7b8>)}, 'weight': 0.1, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"\n", | |||||
" None edge weight specified. Set all weight to 1.\n", | |||||
"\n", | |||||
"reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 28019.19it/s]\n", | |||||
"calculating kernels: 16836it [00:02, 7963.48it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of random walk kernel of size 183 built in 2.2675061225891113 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'conjugate', 'edge_kernels': {'symb': <function deltakernel at 0x7fe9b0a2d730>, 'nsymb': <function gaussiankernel at 0x7fe9b0a2d7b8>, 'mix': functools.partial(<function kernelproduct at 0x7fe9b0a2d8c8>, <function deltakernel at 0x7fe9b0a2d730>, <function gaussiankernel at 0x7fe9b0a2d7b8>)}, 'node_kernels': {'symb': <function deltakernel at 0x7fe9b0a2d730>, 'nsymb': <function gaussiankernel at 0x7fe9b0a2d7b8>, 'mix': functools.partial(<function kernelproduct at 0x7fe9b0a2d8c8>, <function deltakernel at 0x7fe9b0a2d730>, <function gaussiankernel at 0x7fe9b0a2d7b8>)}, 'weight': 0.01, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"\n", | |||||
" None edge weight specified. Set all weight to 1.\n", | |||||
"\n", | |||||
"reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 23036.63it/s]\n", | |||||
"calculating kernels: 12801it [00:01, 8043.11it/s]" | |||||
] | ] | ||||
} | } | ||||
], | ], | ||||
@@ -299,7 +305,8 @@ | |||||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | ||||
" ds_name=ds['name'],\n", | " ds_name=ds['name'],\n", | ||||
" n_jobs=multiprocessing.cpu_count(),\n", | " n_jobs=multiprocessing.cpu_count(),\n", | ||||
" read_gm_from_file=False)\n", | |||||
" read_gm_from_file=False,\n", | |||||
" verbose=True)\n", | |||||
" print()" | " print()" | ||||
] | ] | ||||
} | } | ||||
@@ -106,5 +106,6 @@ for ds in dslist: | |||||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | ||||
ds_name=ds['name'], | ds_name=ds['name'], | ||||
n_jobs=multiprocessing.cpu_count(), | n_jobs=multiprocessing.cpu_count(), | ||||
read_gm_from_file=False) | |||||
read_gm_from_file=False, | |||||
verbose=True) | |||||
print() | print() |
@@ -23,34 +23,34 @@ | |||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"getting sp graphs: 183it [00:00, 5345.48it/s]\n", | |||||
"calculating kernels: 16836it [00:01, 16066.90it/s]\n", | |||||
"getting sp graphs: 183it [00:00, 11704.68it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 17085.14it/s]\n", | |||||
"\n", | "\n", | ||||
" --- shortest path kernel matrix of size 183 built in 1.2855160236358643 seconds ---\n", | |||||
" --- shortest path kernel matrix of size 183 built in 1.2640743255615234 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7fe84734e598>, 'nsymb': <function gaussiankernel at 0x7fe84734e620>, 'mix': functools.partial(<function kernelproduct at 0x7fe84734e730>, <function deltakernel at 0x7fe84734e598>, <function gaussiankernel at 0x7fe84734e620>)}, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | "1 gram matrices are calculated, 0 of which are ignored.\n", | ||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | ||||
"cross validation: 30it [00:03, 8.63it/s]\n", | |||||
"cross validation: 30it [00:03, 8.84it/s]\n", | |||||
"\n", | "\n", | ||||
"4. Getting final performance...\n", | "4. Getting final performance...\n", | ||||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n", | |||||
"best_params_in: [{'alpha': 0.0001}]\n", | |||||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7fe84734e598>, 'nsymb': <function gaussiankernel at 0x7fe84734e620>, 'mix': functools.partial(<function kernelproduct at 0x7fe84734e730>, <function deltakernel at 0x7fe84734e598>, <function gaussiankernel at 0x7fe84734e620>)}, 'n_jobs': 8, 'verbose': True}]\n", | |||||
"best_params_in: [{'alpha': 1e-10}]\n", | |||||
"\n", | "\n", | ||||
"best_val_perf: 9.674788994813262\n", | |||||
"best_val_std: 0.6229031522274688\n", | |||||
"final_performance: [9.590999824754439]\n", | |||||
"final_confidence: [2.911796096257332]\n", | |||||
"train_performance: [6.16594412531739]\n", | |||||
"train_std: [0.2739093211154806]\n", | |||||
"best_val_perf: 9.687399048018559\n", | |||||
"best_val_std: 0.654180946161292\n", | |||||
"final_performance: [9.411656660355659]\n", | |||||
"final_confidence: [2.500437167823725]\n", | |||||
"train_performance: [6.168480355249007]\n", | |||||
"train_std: [0.2541557651056269]\n", | |||||
"\n", | "\n", | ||||
"time to calculate gram matrix with different hyper-params: 1.29±nans\n", | |||||
"time to calculate best gram matrix: 1.29±nans\n", | |||||
"total training time with all hyper-param choices: 5.15s\n", | |||||
"time to calculate gram matrix with different hyper-params: 1.26±0.00s\n", | |||||
"time to calculate best gram matrix: 1.26±0.00s\n", | |||||
"total training time with all hyper-param choices: 5.09s\n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
@@ -67,51 +67,35 @@ | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
" 1 graphs are removed as they don't contain edges.\n", | " 1 graphs are removed as they don't contain edges.\n", | ||||
"\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stderr", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", | |||||
" keepdims=keepdims)\n", | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||||
" ret = ret.dtype.type(ret / rcount)\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"getting sp graphs: 149it [00:00, 6510.18it/s]\n", | |||||
"calculating kernels: 11175it [00:00, 18881.68it/s]\n", | |||||
"\n", | "\n", | ||||
" --- shortest path kernel matrix of size 149 built in 0.8007419109344482 seconds ---\n", | |||||
"getting sp graphs: 149it [00:00, 7096.72it/s]\n", | |||||
"calculating kernels: 11175it [00:00, 19504.73it/s]\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||||
" --- shortest path kernel matrix of size 149 built in 0.7957959175109863 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7fe84734e598>, 'nsymb': <function gaussiankernel at 0x7fe84734e620>, 'mix': functools.partial(<function kernelproduct at 0x7fe84734e730>, <function deltakernel at 0x7fe84734e598>, <function gaussiankernel at 0x7fe84734e620>)}, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | "1 gram matrices are calculated, 0 of which are ignored.\n", | ||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | ||||
"cross validation: 30it [00:02, 10.52it/s]\n", | |||||
"cross validation: 30it [00:02, 10.74it/s]\n", | |||||
"\n", | "\n", | ||||
"4. Getting final performance...\n", | "4. Getting final performance...\n", | ||||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n", | |||||
"best_params_in: [{'alpha': 3.162277660168379e-07}]\n", | |||||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7fe84734e598>, 'nsymb': <function gaussiankernel at 0x7fe84734e620>, 'mix': functools.partial(<function kernelproduct at 0x7fe84734e730>, <function deltakernel at 0x7fe84734e598>, <function gaussiankernel at 0x7fe84734e620>)}, 'n_jobs': 8, 'verbose': True}]\n", | |||||
"best_params_in: [{'alpha': 1e-05}]\n", | |||||
"\n", | "\n", | ||||
"best_val_perf: 8.784264102873752\n", | |||||
"best_val_std: 0.2656887278835053\n", | |||||
"final_performance: [8.059911355753659]\n", | |||||
"final_confidence: [1.9620843656589473]\n", | |||||
"train_performance: [7.8406202266920575]\n", | |||||
"train_std: [0.2177862360087283]\n", | |||||
"best_val_perf: 8.745832519261795\n", | |||||
"best_val_std: 0.26293501071192543\n", | |||||
"final_performance: [7.942686332248635]\n", | |||||
"final_confidence: [1.617779657027359]\n", | |||||
"train_performance: [7.860965083396337]\n", | |||||
"train_std: [0.16888913664254188]\n", | |||||
"\n", | "\n", | ||||
"time to calculate gram matrix with different hyper-params: 0.80±nans\n", | |||||
"time to calculate best gram matrix: 0.80±nans\n", | |||||
"total training time with all hyper-param choices: 4.02s\n", | |||||
"time to calculate gram matrix with different hyper-params: 0.80±0.00s\n", | |||||
"time to calculate best gram matrix: 0.80±0.00s\n", | |||||
"total training time with all hyper-param choices: 3.90s\n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
@@ -126,34 +110,34 @@ | |||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"getting sp graphs: 68it [00:00, 1095.77it/s]\n", | |||||
"calculating kernels: 2346it [00:02, 813.63it/s]\n", | |||||
"getting sp graphs: 68it [00:00, 2292.58it/s]\n", | |||||
"calculating kernels: 2346it [00:02, 873.39it/s]\n", | |||||
"\n", | "\n", | ||||
" --- shortest path kernel matrix of size 68 built in 3.110588550567627 seconds ---\n", | |||||
" --- shortest path kernel matrix of size 68 built in 2.986046075820923 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7fe84734e598>, 'nsymb': <function gaussiankernel at 0x7fe84734e620>, 'mix': functools.partial(<function kernelproduct at 0x7fe84734e730>, <function deltakernel at 0x7fe84734e598>, <function gaussiankernel at 0x7fe84734e620>)}, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | "1 gram matrices are calculated, 0 of which are ignored.\n", | ||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | ||||
"cross validation: 30it [00:02, 10.97it/s]\n", | |||||
"cross validation: 30it [00:02, 11.85it/s]\n", | |||||
"\n", | "\n", | ||||
"4. Getting final performance...\n", | "4. Getting final performance...\n", | ||||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n", | |||||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7fe84734e598>, 'nsymb': <function gaussiankernel at 0x7fe84734e620>, 'mix': functools.partial(<function kernelproduct at 0x7fe84734e730>, <function deltakernel at 0x7fe84734e598>, <function gaussiankernel at 0x7fe84734e620>)}, 'n_jobs': 8, 'verbose': True}]\n", | |||||
"best_params_in: [{'C': 3162.2776601683795}]\n", | "best_params_in: [{'C': 3162.2776601683795}]\n", | ||||
"\n", | "\n", | ||||
"best_val_perf: 0.8798412698412699\n", | |||||
"best_val_std: 0.02062186442241262\n", | |||||
"final_performance: [0.9042857142857144]\n", | |||||
"final_confidence: [0.07343487734322982]\n", | |||||
"train_performance: [0.9709180695847363]\n", | |||||
"train_std: [0.005927396388634032]\n", | |||||
"best_val_perf: 0.8780158730158729\n", | |||||
"best_val_std: 0.028162670831398482\n", | |||||
"final_performance: [0.8714285714285714]\n", | |||||
"final_confidence: [0.09446318571439967]\n", | |||||
"train_performance: [0.9740729517396185]\n", | |||||
"train_std: [0.007872630412568218]\n", | |||||
"\n", | "\n", | ||||
"time to calculate gram matrix with different hyper-params: 3.11±nans\n", | |||||
"time to calculate best gram matrix: 3.11±nans\n", | |||||
"total training time with all hyper-param choices: 6.21s\n", | |||||
"time to calculate gram matrix with different hyper-params: 2.99±0.00s\n", | |||||
"time to calculate best gram matrix: 2.99±0.00s\n", | |||||
"total training time with all hyper-param choices: 5.93s\n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
@@ -168,19 +152,8 @@ | |||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"getting sp graphs: 94it [00:00, 2190.46it/s]\n", | |||||
"calculating kernels: 4465it [00:05, 763.81it/s]\n", | |||||
"\n", | |||||
" --- shortest path kernel matrix of size 94 built in 6.083932399749756 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"\n", | |||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||||
"\n", | |||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||||
"cross validation: 0it [00:00, ?it/s]" | |||||
"getting sp graphs: 94it [00:00, 2131.93it/s]\n", | |||||
"calculating kernels: 1501it [00:01, 78.00it/s]" | |||||
] | ] | ||||
} | } | ||||
], | ], | ||||
@@ -264,7 +237,8 @@ | |||||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | ||||
" ds_name=ds['name'],\n", | " ds_name=ds['name'],\n", | ||||
" n_jobs=multiprocessing.cpu_count(),\n", | " n_jobs=multiprocessing.cpu_count(),\n", | ||||
" read_gm_from_file=False)\n", | |||||
" read_gm_from_file=False,\n", | |||||
" verbose=True)\n", | |||||
" print()" | " print()" | ||||
] | ] | ||||
} | } | ||||
@@ -77,5 +77,6 @@ for ds in dslist: | |||||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | ||||
ds_name=ds['name'], | ds_name=ds['name'], | ||||
n_jobs=multiprocessing.cpu_count(), | n_jobs=multiprocessing.cpu_count(), | ||||
read_gm_from_file=False) | |||||
read_gm_from_file=False, | |||||
verbose=True) | |||||
print() | print() |
@@ -23,34 +23,34 @@ | |||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"getting shortest paths: 183it [00:00, 5316.42it/s]\n", | |||||
"calculating kernels: 16836it [00:03, 4625.84it/s]\n", | |||||
"getting shortest paths: 183it [00:00, 5323.35it/s]\n", | |||||
"calculating kernels: 16836it [00:02, 5980.75it/s]\n", | |||||
"\n", | "\n", | ||||
" --- shortest path kernel matrix of size 183 built in 3.8611345291137695 seconds ---\n", | |||||
" --- shortest path kernel matrix of size 183 built in 3.0884954929351807 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | "1 gram matrices are calculated, 0 of which are ignored.\n", | ||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | ||||
"cross validation: 30it [00:03, 8.71it/s]\n", | |||||
"cross validation: 30it [00:03, 8.90it/s]\n", | |||||
"\n", | "\n", | ||||
"4. Getting final performance...\n", | "4. Getting final performance...\n", | ||||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n", | |||||
"best_params_in: [{'alpha': 0.0031622776601683794}]\n", | |||||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'n_jobs': 8, 'verbose': True}]\n", | |||||
"best_params_in: [{'alpha': 0.001}]\n", | |||||
"\n", | "\n", | ||||
"best_val_perf: 12.673707811197355\n", | |||||
"best_val_std: 0.8773195213759171\n", | |||||
"final_performance: [12.972668262063593]\n", | |||||
"final_confidence: [3.7642237202379087]\n", | |||||
"train_performance: [3.934708519599526]\n", | |||||
"train_std: [0.16225809646161615]\n", | |||||
"best_val_perf: 12.857015647214508\n", | |||||
"best_val_std: 0.8860388066269581\n", | |||||
"final_performance: [12.157314781928168]\n", | |||||
"final_confidence: [2.5739406086892296]\n", | |||||
"train_performance: [3.773093745028789]\n", | |||||
"train_std: [0.12430822644728814]\n", | |||||
"\n", | "\n", | ||||
"time to calculate gram matrix with different hyper-params: 3.86±nans\n", | |||||
"time to calculate best gram matrix: 3.86±nans\n", | |||||
"total training time with all hyper-param choices: 7.74s\n", | |||||
"time to calculate gram matrix with different hyper-params: 3.09±0.00s\n", | |||||
"time to calculate best gram matrix: 3.09±0.00s\n", | |||||
"total training time with all hyper-param choices: 6.84s\n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
@@ -64,51 +64,35 @@ | |||||
"2. Calculating gram matrices. This could take a while...\n", | "2. Calculating gram matrices. This could take a while...\n", | ||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stderr", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", | |||||
" keepdims=keepdims)\n", | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||||
" ret = ret.dtype.type(ret / rcount)\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"getting shortest paths: 150it [00:00, 8822.07it/s]\n", | |||||
"calculating kernels: 11325it [00:02, 5167.04it/s]\n", | |||||
"\n", | "\n", | ||||
" --- shortest path kernel matrix of size 150 built in 2.394453525543213 seconds ---\n", | |||||
"getting shortest paths: 150it [00:00, 5191.83it/s]\n", | |||||
"calculating kernels: 11325it [00:01, 7143.18it/s]\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||||
" --- shortest path kernel matrix of size 150 built in 1.7898523807525635 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | "1 gram matrices are calculated, 0 of which are ignored.\n", | ||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | ||||
"cross validation: 30it [00:02, 10.78it/s]\n", | |||||
"cross validation: 30it [00:02, 10.59it/s]\n", | |||||
"\n", | "\n", | ||||
"4. Getting final performance...\n", | "4. Getting final performance...\n", | ||||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n", | |||||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7ff5ffc0c268>, 'nsymb': <function gaussiankernel at 0x7ff5ffc0c2f0>, 'mix': functools.partial(<function kernelproduct at 0x7ff5ffc0c400>, <function deltakernel at 0x7ff5ffc0c268>, <function gaussiankernel at 0x7ff5ffc0c2f0>)}, 'n_jobs': 8, 'verbose': True}]\n", | |||||
"best_params_in: [{'alpha': 0.1}]\n", | "best_params_in: [{'alpha': 0.1}]\n", | ||||
"\n", | "\n", | ||||
"best_val_perf: 11.082918177885857\n", | |||||
"best_val_std: 0.3037589925734673\n", | |||||
"final_performance: [7.8261546009779925]\n", | |||||
"final_confidence: [1.59375970943081]\n", | |||||
"train_performance: [7.988630946761633]\n", | |||||
"train_std: [0.16054607648943253]\n", | |||||
"best_val_perf: 11.040598123045763\n", | |||||
"best_val_std: 0.31492017111536147\n", | |||||
"final_performance: [8.138193149138093]\n", | |||||
"final_confidence: [1.6238744767195439]\n", | |||||
"train_performance: [7.9412913127748235]\n", | |||||
"train_std: [0.18726339675217385]\n", | |||||
"\n", | "\n", | ||||
"time to calculate gram matrix with different hyper-params: 2.39±nans\n", | |||||
"time to calculate best gram matrix: 2.39±nans\n", | |||||
"total training time with all hyper-param choices: 5.49s\n", | |||||
"time to calculate gram matrix with different hyper-params: 1.79±0.00s\n", | |||||
"time to calculate best gram matrix: 1.79±0.00s\n", | |||||
"total training time with all hyper-param choices: 5.00s\n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
@@ -123,61 +107,8 @@ | |||||
"\n", | "\n", | ||||
" None edge weight specified. Set all weight to 1.\n", | " None edge weight specified. Set all weight to 1.\n", | ||||
"\n", | "\n", | ||||
"getting shortest paths: 68it [00:00, 567.53it/s]\n", | |||||
"calculating kernels: 2346it [00:14, 161.71it/s]\n", | |||||
"\n", | |||||
" --- shortest path kernel matrix of size 68 built in 14.833482265472412 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"\n", | |||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||||
"\n", | |||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||||
"cross validation: 30it [00:02, 13.38it/s]\n", | |||||
"\n", | |||||
"4. Getting final performance...\n", | |||||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n", | |||||
"best_params_in: [{'C': 1000.0}]\n", | |||||
"\n", | |||||
"best_val_perf: 0.9084126984126983\n", | |||||
"best_val_std: 0.027912022159840448\n", | |||||
"final_performance: [0.9085714285714286]\n", | |||||
"final_confidence: [0.0879511091875412]\n", | |||||
"train_performance: [0.9679438832772166]\n", | |||||
"train_std: [0.00754192133247499]\n", | |||||
"\n", | |||||
"time to calculate gram matrix with different hyper-params: 14.83±nans\n", | |||||
"time to calculate best gram matrix: 14.83±nans\n", | |||||
"total training time with all hyper-param choices: 17.42s\n", | |||||
"\n", | |||||
"\n", | |||||
"\n", | |||||
"PAH\n", | |||||
"\n", | |||||
"--- This is a classification problem ---\n", | |||||
"\n", | |||||
"\n", | |||||
"1. Loading dataset from file...\n", | |||||
"\n", | |||||
"2. Calculating gram matrices. This could take a while...\n", | |||||
"\n", | |||||
" None edge weight specified. Set all weight to 1.\n", | |||||
"\n", | |||||
"getting shortest paths: 94it [00:00, 447.28it/s]\n", | |||||
"calculating kernels: 4465it [01:04, 68.94it/s] \n", | |||||
"\n", | |||||
" --- shortest path kernel matrix of size 94 built in 65.20552921295166 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"\n", | |||||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||||
"\n", | |||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||||
"cross validation: 0it [00:00, ?it/s]" | |||||
"getting shortest paths: 68it [00:00, 536.19it/s]\n", | |||||
"calculating kernels: 0it [00:00, ?it/s]" | |||||
] | ] | ||||
} | } | ||||
], | ], | ||||
@@ -260,7 +191,8 @@ | |||||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | ||||
" ds_name=ds['name'],\n", | " ds_name=ds['name'],\n", | ||||
" n_jobs=multiprocessing.cpu_count(),\n", | " n_jobs=multiprocessing.cpu_count(),\n", | ||||
" read_gm_from_file=False)\n", | |||||
" read_gm_from_file=False,\n", | |||||
" verbose=True)\n", | |||||
" print()" | " print()" | ||||
] | ] | ||||
} | } | ||||
@@ -84,5 +84,6 @@ for ds in dslist: | |||||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | ||||
ds_name=ds['name'], | ds_name=ds['name'], | ||||
n_jobs=multiprocessing.cpu_count(), | n_jobs=multiprocessing.cpu_count(), | ||||
read_gm_from_file=False) | |||||
read_gm_from_file=False, | |||||
verbose=True) | |||||
print() | print() |
@@ -20,203 +20,187 @@ | |||||
"1. Loading dataset from file...\n", | "1. Loading dataset from file...\n", | ||||
"\n", | "\n", | ||||
"2. Calculating gram matrices. This could take a while...\n", | "2. Calculating gram matrices. This could take a while...\n", | ||||
"getting paths: 183it [00:00, 33583.79it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 382919.33it/s]\n", | |||||
"getting paths: 183it [00:00, 22697.39it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 371524.56it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 1 of size 183 built in 0.28138017654418945 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 1 of size 183 built in 0.27962422370910645 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 49932.19it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 339040.24it/s]\n", | |||||
"getting paths: 183it [00:00, 35988.26it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 444708.75it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 1 of size 183 built in 0.2915959358215332 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 1 of size 183 built in 0.284440279006958 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 13100.71it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 195915.25it/s]\n", | |||||
"getting paths: 183it [00:00, 26474.81it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 215084.65it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 2 of size 183 built in 0.39291882514953613 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 2 of size 183 built in 0.2832369804382324 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 15186.23it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 216679.82it/s]\n", | |||||
"getting paths: 183it [00:00, 18360.43it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 254309.18it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 2 of size 183 built in 0.2922053337097168 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 2 of size 183 built in 0.28844165802001953 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 8410.48it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 146690.73it/s]\n", | |||||
"getting paths: 183it [00:00, 8687.30it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 168741.96it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 3 of size 183 built in 0.3915746212005615 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 3 of size 183 built in 0.38907885551452637 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 13951.28it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 201673.88it/s]\n", | |||||
"getting paths: 183it [00:00, 11379.65it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 195770.23it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 3 of size 183 built in 0.3854410648345947 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 3 of size 183 built in 0.39213061332702637 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 10054.46it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 70713.10it/s]\n", | |||||
"getting paths: 183it [00:00, 8062.50it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 72349.59it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 4 of size 183 built in 0.48105573654174805 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 4 of size 183 built in 0.512467622756958 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 1418.94it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 144898.57it/s]\n", | |||||
"getting paths: 183it [00:00, 10578.68it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 133704.13it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 4 of size 183 built in 0.5477819442749023 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 4 of size 183 built in 0.3866546154022217 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 15604.25it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 103300.82it/s]\n", | |||||
"getting paths: 183it [00:00, 9220.91it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 98386.86it/s] \n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 5 of size 183 built in 0.3788299560546875 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 5 of size 183 built in 0.38112974166870117 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 9795.27it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 121689.68it/s]\n", | |||||
"getting paths: 183it [00:00, 8493.03it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 119698.11it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 5 of size 183 built in 0.3888108730316162 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 5 of size 183 built in 0.38007307052612305 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 7163.19it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 89262.17it/s]\n", | |||||
"getting paths: 183it [00:00, 7385.55it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 88347.09it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 6 of size 183 built in 0.39624905586242676 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 6 of size 183 built in 0.3929023742675781 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 16751.59it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 100004.39it/s]\n", | |||||
"getting paths: 183it [00:00, 5394.24it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 100946.78it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 6 of size 183 built in 0.388913631439209 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 6 of size 183 built in 0.3824801445007324 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 10090.81it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 91172.28it/s] \n", | |||||
"getting paths: 183it [00:00, 12457.52it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 68995.02it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 7 of size 183 built in 0.4908461570739746 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 7 of size 183 built in 0.49313783645629883 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 2997.78it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 104945.65it/s]\n", | |||||
"getting paths: 183it [00:00, 2829.00it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 105515.66it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 7 of size 183 built in 0.36611366271972656 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 7 of size 183 built in 0.35750555992126465 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 6353.90it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 80425.25it/s]\n", | |||||
"getting paths: 183it [00:00, 7427.43it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 81607.79it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 8 of size 183 built in 0.5061323642730713 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 8 of size 183 built in 0.4937615394592285 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 9427.60it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 93863.88it/s] \n", | |||||
"getting paths: 183it [00:00, 5660.08it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 90014.85it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 8 of size 183 built in 0.3872077465057373 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 8 of size 183 built in 0.36504673957824707 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 7575.20it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 82517.07it/s]\n", | |||||
"getting paths: 183it [00:00, 7548.83it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 79498.55it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 9 of size 183 built in 0.48129963874816895 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 9 of size 183 built in 0.47993040084838867 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 6563.74it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 94045.02it/s] \n", | |||||
"getting paths: 183it [00:00, 7319.90it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 92310.24it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 9 of size 183 built in 0.39592933654785156 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 9 of size 183 built in 0.3970515727996826 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 6069.81it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 77447.83it/s]\n", | |||||
"getting paths: 183it [00:00, 8318.60it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 89934.38it/s] \n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 10 of size 183 built in 0.47420382499694824 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 10 of size 183 built in 0.4861469268798828 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 183it [00:00, 9481.17it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 58253.60it/s]\n", | |||||
"getting paths: 183it [00:00, 2635.72it/s]\n", | |||||
"calculating kernels: 16836it [00:00, 90123.30it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 10 of size 183 built in 0.4869115352630615 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 10 of size 183 built in 0.367603063583374 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"20 gram matrices are calculated, 0 of which are ignored.\n", | "20 gram matrices are calculated, 0 of which are ignored.\n", | ||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | "3. Fitting and predicting using nested cross validation. This could really take a while...\n", | ||||
"cross validation: 30it [01:07, 1.10s/it]\n", | |||||
"cross validation: 30it [01:06, 1.11s/it]\n", | |||||
"\n", | "\n", | ||||
"4. Getting final performance...\n", | "4. Getting final performance...\n", | ||||
"best_params_out: [{'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8}]\n", | |||||
"best_params_out: [{'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True}]\n", | |||||
"best_params_in: [{'alpha': 0.01}]\n", | "best_params_in: [{'alpha': 0.01}]\n", | ||||
"\n", | "\n", | ||||
"best_val_perf: 6.8347760734601675\n", | |||||
"best_val_std: 0.26187601855914455\n", | |||||
"final_performance: [6.844597847292873]\n", | |||||
"final_confidence: [1.3282917788841784]\n", | |||||
"train_performance: [2.2886614412566524]\n", | |||||
"train_std: [0.11697823620293107]\n", | |||||
"best_val_perf: 6.842702754673377\n", | |||||
"best_val_std: 0.3600238142615252\n", | |||||
"final_performance: [7.557191252340816]\n", | |||||
"final_confidence: [2.5849069582911595]\n", | |||||
"train_performance: [2.276370048287339]\n", | |||||
"train_std: [0.13830866732067562]\n", | |||||
"\n", | |||||
"time to calculate gram matrix with different hyper-params: 0.39±0.07s\n", | |||||
"time to calculate best gram matrix: 0.28±0.00s\n", | |||||
"total training time with all hyper-param choices: 79.82s\n", | |||||
"\n", | "\n", | ||||
"time to calculate gram matrix with different hyper-params: 0.41±0.07s\n", | |||||
"time to calculate best gram matrix: 0.39±nans\n", | |||||
"total training time with all hyper-param choices: 82.00s\n", | |||||
"\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stderr", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", | |||||
" keepdims=keepdims)\n", | |||||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||||
" ret = ret.dtype.type(ret / rcount)\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"Alkane\n", | "Alkane\n", | ||||
@@ -227,171 +211,44 @@ | |||||
"1. Loading dataset from file...\n", | "1. Loading dataset from file...\n", | ||||
"\n", | "\n", | ||||
"2. Calculating gram matrices. This could take a while...\n", | "2. Calculating gram matrices. This could take a while...\n", | ||||
"getting paths: 150it [00:00, 38060.83it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 447307.64it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 1 of size 150 built in 0.29852986335754395 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 16860.39it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 522115.40it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 1 of size 150 built in 0.29816317558288574 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 18149.30it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 383173.55it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 2 of size 150 built in 0.29796385765075684 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 23172.10it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 427074.37it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 2 of size 150 built in 0.3109288215637207 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 13243.78it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 269283.28it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 3 of size 150 built in 0.29997825622558594 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 20671.78it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 312080.29it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 3 of size 150 built in 0.29572534561157227 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 15393.45it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 362928.87it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 4 of size 150 built in 0.30132484436035156 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 16957.65it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 84503.61it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 4 of size 150 built in 0.4178507328033447 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 14440.54it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 276894.92it/s]\n", | |||||
"getting paths: 150it [00:00, 31366.32it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 509820.58it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 5 of size 150 built in 0.29338693618774414 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 1 of size 150 built in 0.29791831970214844 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 150it [00:00, 15619.30it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 254676.58it/s]\n", | |||||
"getting paths: 150it [00:00, 30330.50it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 655613.27it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 5 of size 150 built in 0.29663729667663574 seconds ---\n", | |||||
" --- kernel matrix of path kernel up to 1 of size 150 built in 0.29232001304626465 seconds ---\n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"\n", | "\n", | ||||
"getting paths: 150it [00:00, 14585.84it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 242964.30it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 6 of size 150 built in 0.29677391052246094 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 11555.83it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 330949.31it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 6 of size 150 built in 0.2948622703552246 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 10424.60it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 238514.96it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 7 of size 150 built in 0.3041496276855469 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 12318.80it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 251979.97it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 7 of size 150 built in 0.3013496398925781 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 10722.00it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 232363.74it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 8 of size 150 built in 0.294144868850708 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 18965.59it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 424638.55it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 8 of size 150 built in 0.2961091995239258 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 13945.38it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 286344.19it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 9 of size 150 built in 0.30029296875 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 9525.87it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 231776.43it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 9 of size 150 built in 0.29835057258605957 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 16916.15it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 85396.60it/s]\n", | |||||
"\n", | |||||
" --- kernel matrix of path kernel up to 10 of size 150 built in 0.42621588706970215 seconds ---\n", | |||||
"\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", | |||||
"\n", | |||||
"getting paths: 150it [00:00, 27568.71it/s]\n" | |||||
] | |||||
}, | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"calculating kernels: 11325it [00:00, 780628.98it/s]\n", | |||||
"\n", | "\n", | ||||
"getting paths: 150it [00:00, 16108.40it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 328896.12it/s]\n", | |||||
" --- kernel matrix of path kernel up to 2 of size 150 built in 0.2590019702911377 seconds ---\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 10 of size 150 built in 0.301084041595459 seconds ---\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", | |||||
"\n", | "\n", | ||||
"getting paths: 150it [00:00, 17554.29it/s]\n", | |||||
"calculating kernels: 11325it [00:00, 320784.55it/s]\n", | |||||
"\n", | "\n", | ||||
" --- kernel matrix of path kernel up to 2 of size 150 built in 0.3091611862182617 seconds ---\n", | |||||
"\n", | "\n", | ||||
"20 gram matrices are calculated, 0 of which are ignored.\n", | |||||
"the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", | |||||
"\n", | "\n", | ||||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||||
"cross validation: 0it [00:00, ?it/s]" | |||||
"\n" | |||||
] | ] | ||||
} | } | ||||
], | ], | ||||
@@ -472,7 +329,8 @@ | |||||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | ||||
" ds_name=ds['name'],\n", | " ds_name=ds['name'],\n", | ||||
" n_jobs=multiprocessing.cpu_count(),\n", | " n_jobs=multiprocessing.cpu_count(),\n", | ||||
" read_gm_from_file=False)\n", | |||||
" read_gm_from_file=False,\n", | |||||
" verbose=True)\n", | |||||
" print()" | " print()" | ||||
] | ] | ||||
}, | }, | ||||
@@ -80,5 +80,6 @@ for ds in dslist: | |||||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | ||||
ds_name=ds['name'], | ds_name=ds['name'], | ||||
n_jobs=multiprocessing.cpu_count(), | n_jobs=multiprocessing.cpu_count(), | ||||
read_gm_from_file=False) | |||||
read_gm_from_file=False, | |||||
verbose=True) | |||||
print() | print() |
@@ -6,23 +6,196 @@ Created on Wed Mar 20 10:12:15 2019 | |||||
inferring a graph grom path frequency. | inferring a graph grom path frequency. | ||||
@author: ljia | @author: ljia | ||||
""" | """ | ||||
#import numpy as np | |||||
import networkx as nx | |||||
from scipy.spatial.distance import hamming | |||||
import itertools | |||||
def SISF(K, v): | def SISF(K, v): | ||||
if output: | if output: | ||||
return output | return output | ||||
else: | else: | ||||
return 'no solution' | return 'no solution' | ||||
def SISF_M(K, v): | def SISF_M(K, v): | ||||
return output | return output | ||||
def GIPF_tree(K, v): | |||||
def GIPF_tree(v_obj, K=1, alphabet=[0, 1]): | |||||
if K == 1: | if K == 1: | ||||
pass | |||||
if G: | |||||
return G | |||||
else: | |||||
return 'no solution' | |||||
n_graph = v_obj[0] + v_obj[1] | |||||
D_T, father_idx = getDynamicTable(n_graph, alphabet) | |||||
# get the vector the closest to v_obj. | |||||
if v_obj not in D_T: | |||||
print('no exact solution') | |||||
dis_lim = 1 / len(v_obj) # the possible shortest distance. | |||||
dis_min = 1.0 # minimum proportional distance | |||||
v_min = v_obj | |||||
for vc in D_T: | |||||
if vc[0] + vc[1] == n_graph: | |||||
# print(vc) | |||||
dis = hamming(vc, v_obj) | |||||
if dis < dis_min: | |||||
dis_min = dis | |||||
v_min = vc | |||||
if dis_min <= dis_lim: | |||||
break | |||||
v_obj = v_min | |||||
# obtain required graph by traceback procedure. | |||||
return getObjectGraph(v_obj, D_T, father_idx, alphabet), v_obj | |||||
def GIPF_M(K, v): | def GIPF_M(K, v): | ||||
return G | |||||
return G | |||||
def getDynamicTable(n_graph, alphabet=[0, 1]): | |||||
# init. When only one node exists. | |||||
D_T = {(1, 0, 0, 0, 0, 0): 1, (0, 1, 0, 0, 0, 0): 1, (0, 0, 1, 0, 0, 0): 0, | |||||
(0, 0, 0, 1, 0, 0): 0, (0, 0, 0, 0, 1, 0): 0, (0, 0, 0, 0, 0, 1): 0,} | |||||
D_T = [(1, 0, 0, 0, 0, 0), (0, 1, 0, 0, 0, 0)] | |||||
father_idx = [-1, -1] # index of each vector's father | |||||
# add possible vectors. | |||||
for idx, v in enumerate(D_T): | |||||
if v[0] + v[1] < n_graph: | |||||
D_T.append((v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])) | |||||
D_T.append((v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])) | |||||
D_T.append((v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])) | |||||
D_T.append((v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)) | |||||
father_idx += [idx, idx, idx, idx] | |||||
# D_T = itertools.chain([(1, 0, 0, 0, 0, 0)], [(0, 1, 0, 0, 0, 0)]) | |||||
# father_idx = itertools.chain([-1], [-1]) # index of each vector's father | |||||
# # add possible vectors. | |||||
# for idx, v in enumerate(D_T): | |||||
# if v[0] + v[1] < n_graph: | |||||
# D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])]) | |||||
# D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])]) | |||||
# D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])]) | |||||
# D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)]) | |||||
# father_idx = itertools.chain(father_idx, [idx, idx, idx, idx]) | |||||
return D_T, father_idx | |||||
def getObjectGraph(v_obj, D_T, father_idx, alphabet=[0, 1]): | |||||
g_obj = nx.Graph() | |||||
# do vector traceback. | |||||
v_tb = [list(v_obj)] # traceback vectors. | |||||
v_tb_idx = [D_T.index(v_obj)] # indices of traceback vectors. | |||||
while v_tb_idx[-1] > 1: | |||||
idx_pre = father_idx[v_tb_idx[-1]] | |||||
v_tb_idx.append(idx_pre) | |||||
v_tb.append(list(D_T[idx_pre])) | |||||
v_tb = v_tb[::-1] # reverse | |||||
# v_tb_idx = v_tb_idx[::-1] | |||||
# construct tree. | |||||
v_c = v_tb[0] # current vector. | |||||
if v_c[0] == 1: | |||||
g_obj.add_node(0, node_label=alphabet[0]) | |||||
else: | |||||
g_obj.add_node(0, node_label=alphabet[1]) | |||||
for vct in v_tb[1:]: | |||||
if vct[0] - v_c[0] == 1: | |||||
if vct[2] - v_c[2] == 2: # transfer 1 | |||||
label1 = alphabet[0] | |||||
label2 = alphabet[0] | |||||
else: # transfer 2 | |||||
label1 = alphabet[1] | |||||
label2 = alphabet[0] | |||||
else: | |||||
if vct[3] - v_c[3] == 1: # transfer 3 | |||||
label1 = alphabet[0] | |||||
label2 = alphabet[1] | |||||
else: # transfer 4 | |||||
label1 = alphabet[1] | |||||
label2 = alphabet[1] | |||||
for nd, attr in g_obj.nodes(data=True): | |||||
if attr['node_label'] == label1: | |||||
nb_node = nx.number_of_nodes(g_obj) | |||||
g_obj.add_node(nb_node, node_label=label2) | |||||
g_obj.add_edge(nd, nb_node) | |||||
break | |||||
v_c = vct | |||||
return g_obj | |||||
import random | |||||
def hierarchy_pos(G, root=None, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5): | |||||
''' | |||||
From Joel's answer at https://stackoverflow.com/a/29597209/2966723. | |||||
Licensed under Creative Commons Attribution-Share Alike | |||||
If the graph is a tree this will return the positions to plot this in a | |||||
hierarchical layout. | |||||
G: the graph (must be a tree) | |||||
root: the root node of current branch | |||||
- if the tree is directed and this is not given, | |||||
the root will be found and used | |||||
- if the tree is directed and this is given, then | |||||
the positions will be just for the descendants of this node. | |||||
- if the tree is undirected and not given, | |||||
then a random choice will be used. | |||||
width: horizontal space allocated for this branch - avoids overlap with other branches | |||||
vert_gap: gap between levels of hierarchy | |||||
vert_loc: vertical location of root | |||||
xcenter: horizontal location of root | |||||
''' | |||||
if not nx.is_tree(G): | |||||
raise TypeError('cannot use hierarchy_pos on a graph that is not a tree') | |||||
if root is None: | |||||
if isinstance(G, nx.DiGraph): | |||||
root = next(iter(nx.topological_sort(G))) #allows back compatibility with nx version 1.11 | |||||
else: | |||||
root = random.choice(list(G.nodes)) | |||||
def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None): | |||||
''' | |||||
see hierarchy_pos docstring for most arguments | |||||
pos: a dict saying where all nodes go if they have been assigned | |||||
parent: parent of this branch. - only affects it if non-directed | |||||
''' | |||||
if pos is None: | |||||
pos = {root:(xcenter,vert_loc)} | |||||
else: | |||||
pos[root] = (xcenter, vert_loc) | |||||
children = list(G.neighbors(root)) | |||||
if not isinstance(G, nx.DiGraph) and parent is not None: | |||||
children.remove(parent) | |||||
if len(children)!=0: | |||||
dx = width/len(children) | |||||
nextx = xcenter - width/2 - dx/2 | |||||
for child in children: | |||||
nextx += dx | |||||
pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, | |||||
vert_loc = vert_loc-vert_gap, xcenter=nextx, | |||||
pos=pos, parent = root) | |||||
return pos | |||||
return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter) | |||||
if __name__ == '__main__': | |||||
v_obj = (6, 4, 10, 3, 3, 2) | |||||
# v_obj = (6, 5, 10, 3, 3, 2) | |||||
tree_obj, v_obj = GIPF_tree(v_obj) | |||||
print('One closest vector is', v_obj) | |||||
# plot | |||||
pos = hierarchy_pos(tree_obj, 0) | |||||
node_labels = nx.get_node_attributes(tree_obj, 'node_label') | |||||
nx.draw(tree_obj, pos=pos, labels=node_labels, with_labels=True) |
@@ -26,7 +26,8 @@ def commonwalkkernel(*args, | |||||
n=None, | n=None, | ||||
weight=1, | weight=1, | ||||
compute_method=None, | compute_method=None, | ||||
n_jobs=None): | |||||
n_jobs=None, | |||||
verbose=True): | |||||
"""Calculate common walk graph kernels between graphs. | """Calculate common walk graph kernels between graphs. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -71,8 +72,9 @@ def commonwalkkernel(*args, | |||||
idx = [G[0] for G in Gn] | idx = [G[0] for G in Gn] | ||||
Gn = [G[1] for G in Gn] | Gn = [G[1] for G in Gn] | ||||
if len(Gn) != len_gn: | if len(Gn) != len_gn: | ||||
print('\n %d graphs are removed as they have only 1 node.\n' % | |||||
(len_gn - len(Gn))) | |||||
if verbose: | |||||
print('\n %d graphs are removed as they have only 1 node.\n' % | |||||
(len_gn - len(Gn))) | |||||
ds_attrs = get_dataset_attributes( | ds_attrs = get_dataset_attributes( | ||||
Gn, | Gn, | ||||
@@ -102,7 +104,7 @@ def commonwalkkernel(*args, | |||||
elif compute_method == 'geo': | elif compute_method == 'geo': | ||||
do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight) | do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(Gn,), n_jobs=n_jobs) | |||||
glbv=(Gn,), n_jobs=n_jobs, verbose=verbose) | |||||
# pool = Pool(n_jobs) | # pool = Pool(n_jobs) | ||||
@@ -167,9 +169,9 @@ def commonwalkkernel(*args, | |||||
# Kmatrix[j][i] = Kmatrix[i][j] | # Kmatrix[j][i] = Kmatrix[i][j] | ||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | |||||
"\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
if verbose: | |||||
print("\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
return Kmatrix, run_time, idx | return Kmatrix, run_time, idx | ||||
@@ -32,7 +32,8 @@ def randomwalkkernel(*args, | |||||
edge_label='bond_type', | edge_label='bond_type', | ||||
# params for spectral method. | # params for spectral method. | ||||
sub_kernel=None, | sub_kernel=None, | ||||
n_jobs=None): | |||||
n_jobs=None, | |||||
verbose=True): | |||||
"""Calculate random walk graph kernels. | """Calculate random walk graph kernels. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -60,7 +61,8 @@ def randomwalkkernel(*args, | |||||
eweight = None | eweight = None | ||||
if edge_weight == None: | if edge_weight == None: | ||||
print('\n None edge weight specified. Set all weight to 1.\n') | |||||
if verbose: | |||||
print('\n None edge weight specified. Set all weight to 1.\n') | |||||
else: | else: | ||||
try: | try: | ||||
some_weight = list( | some_weight = list( | ||||
@@ -68,13 +70,13 @@ def randomwalkkernel(*args, | |||||
if isinstance(some_weight, float) or isinstance(some_weight, int): | if isinstance(some_weight, float) or isinstance(some_weight, int): | ||||
eweight = edge_weight | eweight = edge_weight | ||||
else: | else: | ||||
print( | |||||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
if verbose: | |||||
print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
except: | except: | ||||
print( | |||||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
if verbose: | |||||
print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
ds_attrs = get_dataset_attributes( | ds_attrs = get_dataset_attributes( | ||||
Gn, | Gn, | ||||
@@ -90,8 +92,9 @@ def randomwalkkernel(*args, | |||||
idx = [G[0] for G in Gn] | idx = [G[0] for G in Gn] | ||||
Gn = [G[1] for G in Gn] | Gn = [G[1] for G in Gn] | ||||
if len(Gn) != len_gn: | if len(Gn) != len_gn: | ||||
print('\n %d graphs are removed as they don\'t contain edges.\n' % | |||||
(len_gn - len(Gn))) | |||||
if verbose: | |||||
print('\n %d graphs are removed as they don\'t contain edges.\n' % | |||||
(len_gn - len(Gn))) | |||||
start_time = time.time() | start_time = time.time() | ||||
@@ -100,26 +103,30 @@ def randomwalkkernel(*args, | |||||
# gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed']) | # gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed']) | ||||
if compute_method == 'sylvester': | if compute_method == 'sylvester': | ||||
import warnings | |||||
warnings.warn('All labels are ignored.') | |||||
Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs) | |||||
if verbose: | |||||
import warnings | |||||
warnings.warn('All labels are ignored.') | |||||
Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs, verbose=verbose) | |||||
elif compute_method == 'conjugate': | elif compute_method == 'conjugate': | ||||
Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs, | |||||
node_kernels, edge_kernels, | |||||
node_label, edge_label, eweight, n_jobs) | |||||
Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs, node_kernels, | |||||
edge_kernels, node_label, edge_label, | |||||
eweight, n_jobs, verbose=verbose) | |||||
elif compute_method == 'fp': | elif compute_method == 'fp': | ||||
Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels, | Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels, | ||||
edge_kernels, node_label, edge_label, | |||||
eweight, n_jobs) | |||||
edge_kernels, node_label, edge_label, | |||||
eweight, n_jobs, verbose=verbose) | |||||
elif compute_method == 'spectral': | elif compute_method == 'spectral': | ||||
import warnings | |||||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||||
Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs) | |||||
if verbose: | |||||
import warnings | |||||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||||
Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, | |||||
eweight, n_jobs, verbose=verbose) | |||||
elif compute_method == 'kron': | elif compute_method == 'kron': | ||||
pass | |||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j], | Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j], | ||||
@@ -131,15 +138,15 @@ def randomwalkkernel(*args, | |||||
) | ) | ||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | |||||
"\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
if verbose: | |||||
print("\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
return Kmatrix, run_time, idx | return Kmatrix, run_time, idx | ||||
############################################################################### | ############################################################################### | ||||
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs): | |||||
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, verbose=True): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | """Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | ||||
Parameters | Parameters | ||||
@@ -162,8 +169,9 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs): | |||||
# don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
# A_wave_list accually contains the transposes of the adjacency matrices. | # A_wave_list accually contains the transposes of the adjacency matrices. | ||||
A_wave_list = [ | A_wave_list = [ | ||||
nx.adjacency_matrix(G, eweight).todense().transpose() for G in tqdm( | |||||
Gn, desc='compute adjacency matrices', file=sys.stdout) | |||||
nx.adjacency_matrix(G, eweight).todense().transpose() for G in | |||||
(tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) if | |||||
verbose else Gn) | |||||
] | ] | ||||
# # normalized adjacency matrices | # # normalized adjacency matrices | ||||
# A_wave_list = [] | # A_wave_list = [] | ||||
@@ -178,7 +186,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs): | |||||
G_Awl = Awl_toshare | G_Awl = Awl_toshare | ||||
do_partial = partial(wrapper_se_do, lmda) | do_partial = partial(wrapper_se_do, lmda) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(A_wave_list,), n_jobs=n_jobs) | |||||
glbv=(A_wave_list,), n_jobs=n_jobs, verbose=verbose) | |||||
# pbar = tqdm( | # pbar = tqdm( | ||||
# total=(1 + len(Gn)) * len(Gn) / 2, | # total=(1 + len(Gn)) * len(Gn) / 2, | ||||
@@ -226,7 +234,7 @@ def _se_do(A_wave1, A_wave2, lmda): | |||||
############################################################################### | ############################################################################### | ||||
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | ||||
node_label, edge_label, eweight, n_jobs): | |||||
node_label, edge_label, eweight, n_jobs, verbose=True): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method. | """Calculate walk graph kernels up to n between 2 graphs using conjugate method. | ||||
Parameters | Parameters | ||||
@@ -265,8 +273,8 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
# else: | # else: | ||||
# reindex nodes using consecutive integers for convenience of kernel calculation. | # reindex nodes using consecutive integers for convenience of kernel calculation. | ||||
Gn = [nx.convert_node_labels_to_integers( | Gn = [nx.convert_node_labels_to_integers( | ||||
g, first_label=0, label_attribute='label_orignal') for g in tqdm( | |||||
Gn, desc='reindex vertices', file=sys.stdout)] | |||||
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | |||||
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | |||||
if p == None and q == None: # p and q are uniform distributions as default. | if p == None and q == None: # p and q are uniform distributions as default. | ||||
def init_worker(gn_toshare): | def init_worker(gn_toshare): | ||||
@@ -275,7 +283,7 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels, | do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels, | ||||
node_label, edge_kernels, edge_label, lmda) | node_label, edge_kernels, edge_label, lmda) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(Gn,), n_jobs=n_jobs) | |||||
glbv=(Gn,), n_jobs=n_jobs, verbose=verbose) | |||||
# pbar = tqdm( | # pbar = tqdm( | ||||
# total=(1 + len(Gn)) * len(Gn) / 2, | # total=(1 + len(Gn)) * len(Gn) / 2, | ||||
@@ -341,7 +349,7 @@ def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||||
############################################################################### | ############################################################################### | ||||
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | ||||
node_label, edge_label, eweight, n_jobs): | |||||
node_label, edge_label, eweight, n_jobs, verbose=True): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. | """Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. | ||||
Parameters | Parameters | ||||
@@ -393,8 +401,8 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
# else: | # else: | ||||
# reindex nodes using consecutive integers for convenience of kernel calculation. | # reindex nodes using consecutive integers for convenience of kernel calculation. | ||||
Gn = [nx.convert_node_labels_to_integers( | Gn = [nx.convert_node_labels_to_integers( | ||||
g, first_label=0, label_attribute='label_orignal') for g in tqdm( | |||||
Gn, desc='reindex vertices', file=sys.stdout)] | |||||
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | |||||
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | |||||
if p == None and q == None: # p and q are uniform distributions as default. | if p == None and q == None: # p and q are uniform distributions as default. | ||||
def init_worker(gn_toshare): | def init_worker(gn_toshare): | ||||
@@ -403,7 +411,7 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels, | do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels, | ||||
node_label, edge_kernels, edge_label, lmda) | node_label, edge_kernels, edge_label, lmda) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(Gn,), n_jobs=n_jobs) | |||||
glbv=(Gn,), n_jobs=n_jobs, verbose=verbose) | |||||
return Kmatrix | return Kmatrix | ||||
@@ -445,7 +453,7 @@ def func_fp(x, p_times, lmda, w_times): | |||||
############################################################################### | ############################################################################### | ||||
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs): | |||||
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, verbose=True): | |||||
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using | """Calculate walk graph kernels up to n between 2 unlabeled graphs using | ||||
spectral decomposition method. Labels will be ignored. | spectral decomposition method. Labels will be ignored. | ||||
@@ -469,7 +477,8 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs): | |||||
# precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
P_list = [] | P_list = [] | ||||
D_list = [] | D_list = [] | ||||
for G in tqdm(Gn, desc='spectral decompose', file=sys.stdout): | |||||
for G in (tqdm(Gn, desc='spectral decompose', file=sys.stdout) if | |||||
verbose else Gn): | |||||
# don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
# A accually is the transpose of the adjacency matrix. | # A accually is the transpose of the adjacency matrix. | ||||
A = nx.adjacency_matrix(G, eweight).todense().transpose() | A = nx.adjacency_matrix(G, eweight).todense().transpose() | ||||
@@ -488,7 +497,8 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs): | |||||
G_D = D_toshare | G_D = D_toshare | ||||
do_partial = partial(wrapper_sd_do, weight, sub_kernel) | do_partial = partial(wrapper_sd_do, weight, sub_kernel) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs) | |||||
glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs, | |||||
verbose=verbose) | |||||
# pbar = tqdm( | # pbar = tqdm( | ||||
@@ -56,7 +56,8 @@ def spkernel(*args, | |||||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | Gn = args[0] if len(args) == 1 else [args[0], args[1]] | ||||
weight = None | weight = None | ||||
if edge_weight is None: | if edge_weight is None: | ||||
print('\n None edge weight specified. Set all weight to 1.\n') | |||||
if verbose: | |||||
print('\n None edge weight specified. Set all weight to 1.\n') | |||||
else: | else: | ||||
try: | try: | ||||
some_weight = list( | some_weight = list( | ||||
@@ -64,13 +65,15 @@ def spkernel(*args, | |||||
if isinstance(some_weight, (float, int)): | if isinstance(some_weight, (float, int)): | ||||
weight = edge_weight | weight = edge_weight | ||||
else: | else: | ||||
print( | |||||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
if verbose: | |||||
print( | |||||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
except: | except: | ||||
print( | |||||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
if verbose: | |||||
print( | |||||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
ds_attrs = get_dataset_attributes( | ds_attrs = get_dataset_attributes( | ||||
Gn, | Gn, | ||||
attr_names=['node_labeled', 'node_attr_dim', 'is_directed'], | attr_names=['node_labeled', 'node_attr_dim', 'is_directed'], | ||||
@@ -83,8 +86,9 @@ def spkernel(*args, | |||||
idx = [G[0] for G in Gn] | idx = [G[0] for G in Gn] | ||||
Gn = [G[1] for G in Gn] | Gn = [G[1] for G in Gn] | ||||
if len(Gn) != len_gn: | if len(Gn) != len_gn: | ||||
print('\n %d graphs are removed as they don\'t contain edges.\n' % | |||||
(len_gn - len(Gn))) | |||||
if verbose: | |||||
print('\n %d graphs are removed as they don\'t contain edges.\n' % | |||||
(len_gn - len(Gn))) | |||||
start_time = time.time() | start_time = time.time() | ||||
@@ -100,9 +104,12 @@ def spkernel(*args, | |||||
chunksize = int(len(Gn) / n_jobs) + 1 | chunksize = int(len(Gn) / n_jobs) + 1 | ||||
else: | else: | ||||
chunksize = 100 | chunksize = 100 | ||||
for i, g in tqdm( | |||||
pool.imap_unordered(getsp_partial, itr, chunksize), | |||||
desc='getting sp graphs', file=sys.stdout): | |||||
if verbose: | |||||
iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize), | |||||
desc='getting sp graphs', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(getsp_partial, itr, chunksize) | |||||
for i, g in iterator: | |||||
Gn[i] = g | Gn[i] = g | ||||
pool.close() | pool.close() | ||||
pool.join() | pool.join() | ||||
@@ -186,9 +193,10 @@ def spkernel(*args, | |||||
# Kmatrix[j][i] = kernel | # Kmatrix[j][i] = kernel | ||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | |||||
"\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
if verbose: | |||||
print( | |||||
"\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
return Kmatrix, run_time, idx | return Kmatrix, run_time, idx | ||||
@@ -32,7 +32,8 @@ def structuralspkernel(*args, | |||||
node_kernels=None, | node_kernels=None, | ||||
edge_kernels=None, | edge_kernels=None, | ||||
compute_method='naive', | compute_method='naive', | ||||
n_jobs=None): | |||||
n_jobs=None, | |||||
verbose=True): | |||||
"""Calculate mean average structural shortest path kernels between graphs. | """Calculate mean average structural shortest path kernels between graphs. | ||||
Parameters | Parameters | ||||
@@ -75,7 +76,8 @@ def structuralspkernel(*args, | |||||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | Gn = args[0] if len(args) == 1 else [args[0], args[1]] | ||||
weight = None | weight = None | ||||
if edge_weight is None: | if edge_weight is None: | ||||
print('\n None edge weight specified. Set all weight to 1.\n') | |||||
if verbose: | |||||
print('\n None edge weight specified. Set all weight to 1.\n') | |||||
else: | else: | ||||
try: | try: | ||||
some_weight = list( | some_weight = list( | ||||
@@ -83,13 +85,15 @@ def structuralspkernel(*args, | |||||
if isinstance(some_weight, (float, int)): | if isinstance(some_weight, (float, int)): | ||||
weight = edge_weight | weight = edge_weight | ||||
else: | else: | ||||
print( | |||||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
if verbose: | |||||
print( | |||||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
except: | except: | ||||
print( | |||||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
if verbose: | |||||
print( | |||||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
ds_attrs = get_dataset_attributes( | ds_attrs = get_dataset_attributes( | ||||
Gn, | Gn, | ||||
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | ||||
@@ -110,11 +114,13 @@ def structuralspkernel(*args, | |||||
if compute_method == 'trie': | if compute_method == 'trie': | ||||
getsp_partial = partial(wrapper_getSP_trie, weight, ds_attrs['is_directed']) | getsp_partial = partial(wrapper_getSP_trie, weight, ds_attrs['is_directed']) | ||||
else: | else: | ||||
getsp_partial = partial(wrapper_getSP_naive, weight, ds_attrs['is_directed']) | |||||
for i, sp in tqdm( | |||||
pool.imap_unordered(getsp_partial, itr, chunksize), | |||||
desc='getting shortest paths', | |||||
file=sys.stdout): | |||||
getsp_partial = partial(wrapper_getSP_naive, weight, ds_attrs['is_directed']) | |||||
if verbose: | |||||
iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize), | |||||
desc='getting shortest paths', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(getsp_partial, itr, chunksize) | |||||
for i, sp in iterator: | |||||
splist[i] = sp | splist[i] = sp | ||||
# time.sleep(10) | # time.sleep(10) | ||||
pool.close() | pool.close() | ||||
@@ -169,12 +175,12 @@ def structuralspkernel(*args, | |||||
do_partial = partial(wrapper_ssp_do_trie, ds_attrs, node_label, edge_label, | do_partial = partial(wrapper_ssp_do_trie, ds_attrs, node_label, edge_label, | ||||
node_kernels, edge_kernels) | node_kernels, edge_kernels) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(splist, Gn), n_jobs=n_jobs) | |||||
glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) | |||||
else: | else: | ||||
do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | ||||
node_kernels, edge_kernels) | node_kernels, edge_kernels) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(splist, Gn), n_jobs=n_jobs) | |||||
glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) | |||||
# # ---- use pool.map to parallel. ---- | # # ---- use pool.map to parallel. ---- | ||||
# pool = Pool(n_jobs) | # pool = Pool(n_jobs) | ||||
@@ -233,9 +239,9 @@ def structuralspkernel(*args, | |||||
# Kmatrix[j][i] = kernel | # Kmatrix[j][i] = kernel | ||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | |||||
"\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
if verbose: | |||||
print("\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
return Kmatrix, run_time | return Kmatrix, run_time | ||||
@@ -28,7 +28,8 @@ def untilhpathkernel(*args, | |||||
depth=10, | depth=10, | ||||
k_func='tanimoto', | k_func='tanimoto', | ||||
compute_method='trie', | compute_method='trie', | ||||
n_jobs=None): | |||||
n_jobs=None, | |||||
verbose=True): | |||||
"""Calculate path graph kernels up to depth/hight h between graphs. | """Calculate path graph kernels up to depth/hight h between graphs. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -88,9 +89,12 @@ def untilhpathkernel(*args, | |||||
else: | else: | ||||
getps_partial = partial(wrapper_find_all_paths_until_length, depth, | getps_partial = partial(wrapper_find_all_paths_until_length, depth, | ||||
ds_attrs, node_label, edge_label) | ds_attrs, node_label, edge_label) | ||||
for i, ps in tqdm( | |||||
pool.imap_unordered(getps_partial, itr, chunksize), | |||||
desc='getting paths', file=sys.stdout): | |||||
if verbose: | |||||
iterator = tqdm(pool.imap_unordered(getps_partial, itr, chunksize), | |||||
desc='getting paths', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(getps_partial, itr, chunksize) | |||||
for i, ps in iterator: | |||||
all_paths[i] = ps | all_paths[i] = ps | ||||
pool.close() | pool.close() | ||||
pool.join() | pool.join() | ||||
@@ -122,14 +126,14 @@ def untilhpathkernel(*args, | |||||
G_trie = trie_toshare | G_trie = trie_toshare | ||||
do_partial = partial(wrapper_uhpath_do_trie, k_func) | do_partial = partial(wrapper_uhpath_do_trie, k_func) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(all_paths,), n_jobs=n_jobs) | |||||
glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) | |||||
else: | else: | ||||
def init_worker(plist_toshare): | def init_worker(plist_toshare): | ||||
global G_plist | global G_plist | ||||
G_plist = plist_toshare | G_plist = plist_toshare | ||||
do_partial = partial(wrapper_uhpath_do_naive, k_func) | do_partial = partial(wrapper_uhpath_do_naive, k_func) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(all_paths,), n_jobs=n_jobs) | |||||
glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) | |||||
# # ---- direct running, normally use single CPU core. ---- | # # ---- direct running, normally use single CPU core. ---- | ||||
@@ -167,9 +171,9 @@ def untilhpathkernel(*args, | |||||
# pbar.update(1) | # pbar.update(1) | ||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | |||||
"\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" | |||||
% (depth, len(Gn), run_time)) | |||||
if verbose: | |||||
print("\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" | |||||
% (depth, len(Gn), run_time)) | |||||
# print(Kmatrix[0][0:10]) | # print(Kmatrix[0][0:10]) | ||||
return Kmatrix, run_time | return Kmatrix, run_time | ||||
@@ -32,7 +32,8 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
extra_params=None, | extra_params=None, | ||||
ds_name='ds-unknown', | ds_name='ds-unknown', | ||||
n_jobs=1, | n_jobs=1, | ||||
read_gm_from_file=False): | |||||
read_gm_from_file=False, | |||||
verbose=True): | |||||
"""Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results. | """Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results. | ||||
Parameters | Parameters | ||||
@@ -84,15 +85,17 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
raise Exception( | raise Exception( | ||||
'The model type is incorrect! Please choose from regression or classification.' | 'The model type is incorrect! Please choose from regression or classification.' | ||||
) | ) | ||||
print() | |||||
print('--- This is a %s problem ---' % model_type) | |||||
if verbose: | |||||
print() | |||||
print('--- This is a %s problem ---' % model_type) | |||||
str_fw += 'This is a %s problem.\n' % model_type | str_fw += 'This is a %s problem.\n' % model_type | ||||
# calculate gram matrices rather than read them from file. | # calculate gram matrices rather than read them from file. | ||||
if read_gm_from_file == False: | if read_gm_from_file == False: | ||||
# Load the dataset | # Load the dataset | ||||
print() | |||||
print('\n1. Loading dataset from file...') | |||||
if verbose: | |||||
print() | |||||
print('\n1. Loading dataset from file...') | |||||
if isinstance(datafile, str): | if isinstance(datafile, str): | ||||
dataset, y_all = loadDataset( | dataset, y_all = loadDataset( | ||||
datafile, filename_y=datafile_y, extra_params=extra_params) | datafile, filename_y=datafile_y, extra_params=extra_params) | ||||
@@ -117,14 +120,16 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
] # list to store param grids precomputed ignoring the useless ones | ] # list to store param grids precomputed ignoring the useless ones | ||||
# calculate all gram matrices | # calculate all gram matrices | ||||
print() | |||||
print('2. Calculating gram matrices. This could take a while...') | |||||
if verbose: | |||||
print() | |||||
print('2. Calculating gram matrices. This could take a while...') | |||||
str_fw += '\nII. Gram matrices.\n\n' | str_fw += '\nII. Gram matrices.\n\n' | ||||
tts = time.time() # start training time | tts = time.time() # start training time | ||||
nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) | nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) | ||||
for idx, params_out in enumerate(param_list_precomputed): | for idx, params_out in enumerate(param_list_precomputed): | ||||
y = y_all[:] | y = y_all[:] | ||||
params_out['n_jobs'] = n_jobs | params_out['n_jobs'] = n_jobs | ||||
params_out['verbose'] = verbose | |||||
# print(dataset) | # print(dataset) | ||||
# import networkx as nx | # import networkx as nx | ||||
# nx.draw_networkx(dataset[1]) | # nx.draw_networkx(dataset[1]) | ||||
@@ -154,23 +159,27 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
for j in range(i, len(Kmatrix)): | for j in range(i, len(Kmatrix)): | ||||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | ||||
Kmatrix[j][i] = Kmatrix[i][j] | Kmatrix[j][i] = Kmatrix[i][j] | ||||
print() | |||||
if verbose: | |||||
print() | |||||
if params_out == {}: | if params_out == {}: | ||||
print('the gram matrix is: ') | |||||
if verbose: | |||||
print('the gram matrix is: ') | |||||
str_fw += 'the gram matrix is:\n\n' | str_fw += 'the gram matrix is:\n\n' | ||||
else: | else: | ||||
print('the gram matrix with parameters', params_out, 'is: \n\n') | |||||
if verbose: | |||||
print('the gram matrix with parameters', params_out, 'is: \n\n') | |||||
str_fw += 'the gram matrix with parameters %s is:\n\n' % params_out | str_fw += 'the gram matrix with parameters %s is:\n\n' % params_out | ||||
if len(Kmatrix) < 2: | if len(Kmatrix) < 2: | ||||
nb_gm_ignore += 1 | nb_gm_ignore += 1 | ||||
print('ignored, as at most only one of all its diagonal value is non-zero.') | |||||
if verbose: | |||||
print('ignored, as at most only one of all its diagonal value is non-zero.') | |||||
str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\n\n' | str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\n\n' | ||||
else: | else: | ||||
if np.isnan(Kmatrix).any( | if np.isnan(Kmatrix).any( | ||||
): # if the matrix contains elements that are not numbers | ): # if the matrix contains elements that are not numbers | ||||
nb_gm_ignore += 1 | nb_gm_ignore += 1 | ||||
print('ignored, as it contains elements that are not numbers.') | |||||
if verbose: | |||||
print('ignored, as it contains elements that are not numbers.') | |||||
str_fw += 'ignored, as it contains elements that are not numbers.\n\n' | str_fw += 'ignored, as it contains elements that are not numbers.\n\n' | ||||
else: | else: | ||||
# print(Kmatrix) | # print(Kmatrix) | ||||
@@ -193,10 +202,12 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
gram_matrix_time.append(current_run_time) | gram_matrix_time.append(current_run_time) | ||||
param_list_pre_revised.append(params_out) | param_list_pre_revised.append(params_out) | ||||
if nb_g_ignore > 0: | if nb_g_ignore > 0: | ||||
print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore) | |||||
if verbose: | |||||
print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore) | |||||
str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore | str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore | ||||
print() | |||||
print( | |||||
if verbose: | |||||
print() | |||||
print( | |||||
'{} gram matrices are calculated, {} of which are ignored.'.format( | '{} gram matrices are calculated, {} of which are ignored.'.format( | ||||
len(param_list_precomputed), nb_gm_ignore)) | len(param_list_precomputed), nb_gm_ignore)) | ||||
str_fw += '{} gram matrices are calculated, {} of which are ignored.\n\n'.format(len(param_list_precomputed), nb_gm_ignore) | str_fw += '{} gram matrices are calculated, {} of which are ignored.\n\n'.format(len(param_list_precomputed), nb_gm_ignore) | ||||
@@ -205,20 +216,22 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
'{}: {}\n'.format(idx, params_out) | '{}: {}\n'.format(idx, params_out) | ||||
for idx, params_out in enumerate(param_list_precomputed) | for idx, params_out in enumerate(param_list_precomputed) | ||||
]) | ]) | ||||
print() | |||||
if verbose: | |||||
print() | |||||
if len(gram_matrices) == 0: | if len(gram_matrices) == 0: | ||||
print('all gram matrices are ignored, no results obtained.') | |||||
if verbose: | |||||
print('all gram matrices are ignored, no results obtained.') | |||||
str_fw += '\nall gram matrices are ignored, no results obtained.\n\n' | str_fw += '\nall gram matrices are ignored, no results obtained.\n\n' | ||||
else: | else: | ||||
# save gram matrices to file. | # save gram matrices to file. | ||||
np.savez(results_dir + '/' + ds_name + '.gm', | np.savez(results_dir + '/' + ds_name + '.gm', | ||||
gms=gram_matrices, params=param_list_pre_revised, y=y, | gms=gram_matrices, params=param_list_pre_revised, y=y, | ||||
gmtime=gram_matrix_time) | gmtime=gram_matrix_time) | ||||
print( | |||||
if verbose: | |||||
print( | |||||
'3. Fitting and predicting using nested cross validation. This could really take a while...' | '3. Fitting and predicting using nested cross validation. This could really take a while...' | ||||
) | |||||
) | |||||
# ---- use pool.imap_unordered to parallel and track progress. ---- | # ---- use pool.imap_unordered to parallel and track progress. ---- | ||||
# train_pref = [] | # train_pref = [] | ||||
@@ -252,7 +265,12 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
# else: | # else: | ||||
# chunksize = 1000 | # chunksize = 1000 | ||||
chunksize = 1 | chunksize = 1 | ||||
for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): | |||||
if verbose: | |||||
iterator = tqdm(pool.imap_unordered(trial_do_partial, | |||||
range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize) | |||||
for o1, o2, o3 in iterator: | |||||
train_pref.append(o1) | train_pref.append(o1) | ||||
val_pref.append(o2) | val_pref.append(o2) | ||||
test_pref.append(o3) | test_pref.append(o3) | ||||
@@ -278,8 +296,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
# test_pref.append(o3) | # test_pref.append(o3) | ||||
# print() | # print() | ||||
print() | |||||
print('4. Getting final performance...') | |||||
if verbose: | |||||
print() | |||||
print('4. Getting final performance...') | |||||
str_fw += '\nIII. Performance.\n\n' | str_fw += '\nIII. Performance.\n\n' | ||||
# averages and confidences of performances on outer trials for each combination of parameters | # averages and confidences of performances on outer trials for each combination of parameters | ||||
average_train_scores = np.mean(train_pref, axis=0) | average_train_scores = np.mean(train_pref, axis=0) | ||||
@@ -311,11 +330,12 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
param_list_pre_revised[i] for i in best_params_index[0] | param_list_pre_revised[i] for i in best_params_index[0] | ||||
] | ] | ||||
best_params_in = [param_list[i] for i in best_params_index[1]] | best_params_in = [param_list[i] for i in best_params_index[1]] | ||||
print('best_params_out: ', best_params_out) | |||||
print('best_params_in: ', best_params_in) | |||||
print() | |||||
print('best_val_perf: ', best_val_perf) | |||||
print('best_val_std: ', min_val_std) | |||||
if verbose: | |||||
print('best_params_out: ', best_params_out) | |||||
print('best_params_in: ', best_params_in) | |||||
print() | |||||
print('best_val_perf: ', best_val_perf) | |||||
print('best_val_std: ', min_val_std) | |||||
str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out | str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out | ||||
str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in | str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in | ||||
str_fw += 'best_val_perf: %s\n' % best_val_perf | str_fw += 'best_val_perf: %s\n' % best_val_perf | ||||
@@ -332,8 +352,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
std_perf_scores[value][best_params_index[1][idx]] | std_perf_scores[value][best_params_index[1][idx]] | ||||
for idx, value in enumerate(best_params_index[0]) | for idx, value in enumerate(best_params_index[0]) | ||||
] | ] | ||||
print('final_performance: ', final_performance) | |||||
print('final_confidence: ', final_confidence) | |||||
if verbose: | |||||
print('final_performance: ', final_performance) | |||||
print('final_confidence: ', final_confidence) | |||||
str_fw += 'final_performance: %s\n' % final_performance | str_fw += 'final_performance: %s\n' % final_performance | ||||
str_fw += 'final_confidence: %s\n' % final_confidence | str_fw += 'final_confidence: %s\n' % final_confidence | ||||
train_performance = [ | train_performance = [ | ||||
@@ -344,28 +365,29 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
std_train_scores[value][best_params_index[1][idx]] | std_train_scores[value][best_params_index[1][idx]] | ||||
for idx, value in enumerate(best_params_index[0]) | for idx, value in enumerate(best_params_index[0]) | ||||
] | ] | ||||
print('train_performance: %s' % train_performance) | |||||
print('train_std: ', train_std) | |||||
if verbose: | |||||
print('train_performance: %s' % train_performance) | |||||
print('train_std: ', train_std) | |||||
str_fw += 'train_performance: %s\n' % train_performance | str_fw += 'train_performance: %s\n' % train_performance | ||||
str_fw += 'train_std: %s\n\n' % train_std | str_fw += 'train_std: %s\n\n' % train_std | ||||
print() | |||||
if verbose: | |||||
print() | |||||
tt_total = time.time() - tts # training time for all hyper-parameters | tt_total = time.time() - tts # training time for all hyper-parameters | ||||
average_gram_matrix_time = np.mean(gram_matrix_time) | average_gram_matrix_time = np.mean(gram_matrix_time) | ||||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) | |||||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) if len(gram_matrix_time) > 1 else 0 | |||||
best_gram_matrix_time = [ | best_gram_matrix_time = [ | ||||
gram_matrix_time[i] for i in best_params_index[0] | gram_matrix_time[i] for i in best_params_index[0] | ||||
] | ] | ||||
ave_bgmt = np.mean(best_gram_matrix_time) | ave_bgmt = np.mean(best_gram_matrix_time) | ||||
std_bgmt = np.std(best_gram_matrix_time, ddof=1) | |||||
print( | |||||
'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' | |||||
.format(average_gram_matrix_time, std_gram_matrix_time)) | |||||
print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( | |||||
ave_bgmt, std_bgmt)) | |||||
print( | |||||
'total training time with all hyper-param choices: {:.2f}s'.format( | |||||
tt_total)) | |||||
std_bgmt = np.std(best_gram_matrix_time, ddof=1) if len(best_gram_matrix_time) > 1 else 0 | |||||
if verbose: | |||||
print('time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' | |||||
.format(average_gram_matrix_time, std_gram_matrix_time)) | |||||
print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( | |||||
ave_bgmt, std_bgmt)) | |||||
print('total training time with all hyper-param choices: {:.2f}s'.format( | |||||
tt_total)) | |||||
str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time) | str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time) | ||||
str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) | str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) | ||||
str_fw += 'total training time with all hyper-param choices: {:.2f}s\n\n'.format(tt_total) | str_fw += 'total training time with all hyper-param choices: {:.2f}s\n\n'.format(tt_total) | ||||
@@ -437,7 +459,8 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
'params', 'train_perf', 'valid_perf', 'test_perf', | 'params', 'train_perf', 'valid_perf', 'test_perf', | ||||
'gram_matrix_time' | 'gram_matrix_time' | ||||
] | ] | ||||
print() | |||||
if verbose: | |||||
print() | |||||
tb_print = tabulate( | tb_print = tabulate( | ||||
OrderedDict( | OrderedDict( | ||||
sorted(table_dict.items(), | sorted(table_dict.items(), | ||||
@@ -453,8 +476,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
param_list = list(ParameterGrid(param_grid)) | param_list = list(ParameterGrid(param_grid)) | ||||
# read gram matrices from file. | # read gram matrices from file. | ||||
print() | |||||
print('2. Reading gram matrices from file...') | |||||
if verbose: | |||||
print() | |||||
print('2. Reading gram matrices from file...') | |||||
str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n' | str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n' | ||||
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | ||||
gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | ||||
@@ -464,9 +488,10 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
tts = time.time() # start training time | tts = time.time() # start training time | ||||
# nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) | # nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) | ||||
print( | |||||
'3. Fitting and predicting using nested cross validation. This could really take a while...' | |||||
) | |||||
if verbose: | |||||
print( | |||||
'3. Fitting and predicting using nested cross validation. This could really take a while...' | |||||
) | |||||
# ---- use pool.imap_unordered to parallel and track progress. ---- | # ---- use pool.imap_unordered to parallel and track progress. ---- | ||||
def init_worker(gms_toshare): | def init_worker(gms_toshare): | ||||
@@ -479,7 +504,12 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
val_pref = [] | val_pref = [] | ||||
test_pref = [] | test_pref = [] | ||||
chunksize = 1 | chunksize = 1 | ||||
for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): | |||||
if verbose: | |||||
iterator = tqdm(pool.imap_unordered(trial_do_partial, | |||||
range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize) | |||||
for o1, o2, o3 in iterator: | |||||
train_pref.append(o1) | train_pref.append(o1) | ||||
val_pref.append(o2) | val_pref.append(o2) | ||||
test_pref.append(o3) | test_pref.append(o3) | ||||
@@ -509,8 +539,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
# val_pref.append(o2) | # val_pref.append(o2) | ||||
# test_pref.append(o3) | # test_pref.append(o3) | ||||
print() | |||||
print('4. Getting final performance...') | |||||
if verbose: | |||||
print() | |||||
print('4. Getting final performance...') | |||||
str_fw += '\nIII. Performance.\n\n' | str_fw += '\nIII. Performance.\n\n' | ||||
# averages and confidences of performances on outer trials for each combination of parameters | # averages and confidences of performances on outer trials for each combination of parameters | ||||
average_train_scores = np.mean(train_pref, axis=0) | average_train_scores = np.mean(train_pref, axis=0) | ||||
@@ -537,11 +568,12 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
param_list_pre_revised[i] for i in best_params_index[0] | param_list_pre_revised[i] for i in best_params_index[0] | ||||
] | ] | ||||
best_params_in = [param_list[i] for i in best_params_index[1]] | best_params_in = [param_list[i] for i in best_params_index[1]] | ||||
print('best_params_out: ', best_params_out) | |||||
print('best_params_in: ', best_params_in) | |||||
print() | |||||
print('best_val_perf: ', best_val_perf) | |||||
print('best_val_std: ', min_val_std) | |||||
if verbose: | |||||
print('best_params_out: ', best_params_out) | |||||
print('best_params_in: ', best_params_in) | |||||
print() | |||||
print('best_val_perf: ', best_val_perf) | |||||
print('best_val_std: ', min_val_std) | |||||
str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out | str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out | ||||
str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in | str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in | ||||
str_fw += 'best_val_perf: %s\n' % best_val_perf | str_fw += 'best_val_perf: %s\n' % best_val_perf | ||||
@@ -555,8 +587,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
std_perf_scores[value][best_params_index[1][idx]] | std_perf_scores[value][best_params_index[1][idx]] | ||||
for idx, value in enumerate(best_params_index[0]) | for idx, value in enumerate(best_params_index[0]) | ||||
] | ] | ||||
print('final_performance: ', final_performance) | |||||
print('final_confidence: ', final_confidence) | |||||
if verbose: | |||||
print('final_performance: ', final_performance) | |||||
print('final_confidence: ', final_confidence) | |||||
str_fw += 'final_performance: %s\n' % final_performance | str_fw += 'final_performance: %s\n' % final_performance | ||||
str_fw += 'final_confidence: %s\n' % final_confidence | str_fw += 'final_confidence: %s\n' % final_confidence | ||||
train_performance = [ | train_performance = [ | ||||
@@ -567,30 +600,34 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
std_train_scores[value][best_params_index[1][idx]] | std_train_scores[value][best_params_index[1][idx]] | ||||
for idx, value in enumerate(best_params_index[0]) | for idx, value in enumerate(best_params_index[0]) | ||||
] | ] | ||||
print('train_performance: %s' % train_performance) | |||||
print('train_std: ', train_std) | |||||
if verbose: | |||||
print('train_performance: %s' % train_performance) | |||||
print('train_std: ', train_std) | |||||
str_fw += 'train_performance: %s\n' % train_performance | str_fw += 'train_performance: %s\n' % train_performance | ||||
str_fw += 'train_std: %s\n\n' % train_std | str_fw += 'train_std: %s\n\n' % train_std | ||||
print() | |||||
if verbose: | |||||
print() | |||||
average_gram_matrix_time = np.mean(gram_matrix_time) | average_gram_matrix_time = np.mean(gram_matrix_time) | ||||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) | |||||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) if len(gram_matrix_time) > 1 else 0 | |||||
best_gram_matrix_time = [ | best_gram_matrix_time = [ | ||||
gram_matrix_time[i] for i in best_params_index[0] | gram_matrix_time[i] for i in best_params_index[0] | ||||
] | ] | ||||
ave_bgmt = np.mean(best_gram_matrix_time) | ave_bgmt = np.mean(best_gram_matrix_time) | ||||
std_bgmt = np.std(best_gram_matrix_time, ddof=1) | |||||
print( | |||||
'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' | |||||
.format(average_gram_matrix_time, std_gram_matrix_time)) | |||||
print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( | |||||
ave_bgmt, std_bgmt)) | |||||
std_bgmt = np.std(best_gram_matrix_time, ddof=1) if len(best_gram_matrix_time) > 1 else 0 | |||||
if verbose: | |||||
print( | |||||
'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' | |||||
.format(average_gram_matrix_time, std_gram_matrix_time)) | |||||
print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( | |||||
ave_bgmt, std_bgmt)) | |||||
tt_poster = time.time() - tts # training time with hyper-param choices who did not participate in calculation of gram matrices | tt_poster = time.time() - tts # training time with hyper-param choices who did not participate in calculation of gram matrices | ||||
print( | |||||
'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format( | |||||
tt_poster)) | |||||
print('total training time with all hyper-param choices: {:.2f}s'.format( | |||||
tt_poster + np.sum(gram_matrix_time))) | |||||
if verbose: | |||||
print( | |||||
'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format( | |||||
tt_poster)) | |||||
print('total training time with all hyper-param choices: {:.2f}s'.format( | |||||
tt_poster + np.sum(gram_matrix_time))) | |||||
# str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time) | # str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time) | ||||
# str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) | # str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) | ||||
str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster) | str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster) | ||||
@@ -633,7 +670,8 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
keyorder = [ | keyorder = [ | ||||
'params', 'train_perf', 'valid_perf', 'test_perf' | 'params', 'train_perf', 'valid_perf', 'test_perf' | ||||
] | ] | ||||
print() | |||||
if verbose: | |||||
print() | |||||
tb_print = tabulate( | tb_print = tabulate( | ||||
OrderedDict( | OrderedDict( | ||||
sorted(table_dict.items(), | sorted(table_dict.items(), | ||||