diff --git a/notebooks/run_commonwalkkernel.ipynb b/notebooks/run_commonwalkkernel.ipynb index e529200..ea416cf 100644 --- a/notebooks/run_commonwalkkernel.ipynb +++ b/notebooks/run_commonwalkkernel.ipynb @@ -138,7 +138,8 @@ " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", " ds_name=ds['name'],\n", " n_jobs=multiprocessing.cpu_count(),\n", - " read_gm_from_file=False)\n", + " read_gm_from_file=False,\n", + " verbose=True)\n", " print()" ] } diff --git a/notebooks/run_commonwalkkernel.py b/notebooks/run_commonwalkkernel.py index d14d326..7030461 100644 --- a/notebooks/run_commonwalkkernel.py +++ b/notebooks/run_commonwalkkernel.py @@ -82,5 +82,6 @@ for ds in dslist: extra_params=(ds['extra_params'] if 'extra_params' in ds else None), ds_name=ds['name'], n_jobs=multiprocessing.cpu_count(), - read_gm_from_file=False) + read_gm_from_file=False, + verbose=True) print() \ No newline at end of file diff --git a/notebooks/run_marginalizedkernel.ipynb b/notebooks/run_marginalizedkernel.ipynb index db0e9d3..1a9748d 100644 --- a/notebooks/run_marginalizedkernel.ipynb +++ b/notebooks/run_marginalizedkernel.ipynb @@ -170,7 +170,8 @@ " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", " ds_name=ds['name'],\n", " n_jobs=multiprocessing.cpu_count(),\n", - " read_gm_from_file=False)\n", + " read_gm_from_file=False,\n", + " verbose=True)\n", " print()" ] } diff --git a/notebooks/run_marginalizedkernel.py b/notebooks/run_marginalizedkernel.py index e7b7ffd..93e3724 100644 --- a/notebooks/run_marginalizedkernel.py +++ b/notebooks/run_marginalizedkernel.py @@ -81,5 +81,6 @@ for ds in dslist: extra_params=(ds['extra_params'] if 'extra_params' in ds else None), ds_name=ds['name'], n_jobs=multiprocessing.cpu_count(), - read_gm_from_file=False) + read_gm_from_file=False, + verbose=True) print() diff --git a/notebooks/run_randomwalkkernel.ipynb b/notebooks/run_randomwalkkernel.ipynb index fc43bcc..0841d6c 100644 --- a/notebooks/run_randomwalkkernel.ipynb +++ b/notebooks/run_randomwalkkernel.ipynb @@ -23,14 +23,14 @@ "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5067.96it/s]\n" + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5308.25it/s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "../pygraph/kernels/randomWalkKernel.py:104: UserWarning: All labels are ignored.\n", + "../pygraph/kernels/randomWalkKernel.py:108: UserWarning: All labels are ignored.\n", " warnings.warn('All labels are ignored.')\n" ] }, @@ -38,132 +38,132 @@ "name": "stdout", "output_type": "stream", "text": [ - "calculating kernels: 16836it [00:00, 76328.32it/s]\n", + "calculating kernels: 16836it [00:00, 65408.89it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.38956499099731445 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.4157981872558594 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.1, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.1, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5928.69it/s]\n", - "calculating kernels: 16836it [00:00, 76159.65it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5205.09it/s]\n", + "calculating kernels: 16836it [00:00, 73715.56it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.3680381774902344 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.36714887619018555 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5599.99it/s]\n", - "calculating kernels: 16836it [00:00, 68122.63it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5344.96it/s]\n", + "calculating kernels: 16836it [00:00, 68817.65it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.37236690521240234 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.3666379451751709 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.001, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.001, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5444.75it/s]\n", - "calculating kernels: 16836it [00:00, 65554.92it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5295.73it/s]\n", + "calculating kernels: 16836it [00:00, 74865.49it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.37699007987976074 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.36979222297668457 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.0001, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 0.0001, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 4451.65it/s]\n", - "calculating kernels: 16836it [00:00, 64227.38it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5040.80it/s]\n", + "calculating kernels: 16836it [00:00, 70923.54it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.40868353843688965 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.3692610263824463 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-05, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-05, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5455.55it/s]\n", - "calculating kernels: 16836it [00:00, 69253.03it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5326.60it/s]\n", + "calculating kernels: 16836it [00:00, 73697.55it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.4017300605773926 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.37317800521850586 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-06, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-06, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5691.60it/s]\n", - "calculating kernels: 16836it [00:00, 67461.93it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5705.98it/s]\n", + "calculating kernels: 16836it [00:00, 64238.65it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.3712270259857178 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.36565732955932617 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-07, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-07, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5229.98it/s]\n", - "calculating kernels: 16836it [00:00, 70363.33it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 4833.15it/s]\n", + "calculating kernels: 16836it [00:00, 69971.77it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.37551283836364746 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.37798523902893066 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-08, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-08, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5023.84it/s]\n", - "calculating kernels: 16836it [00:00, 71457.21it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 4170.94it/s]\n", + "calculating kernels: 16836it [00:00, 64187.38it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.39424848556518555 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.39433860778808594 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-09, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-09, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5359.14it/s]\n", - "calculating kernels: 16836it [00:00, 73095.73it/s]\n", + "compute adjacency matrices: 100%|██████████| 183/183 [00:00<00:00, 5273.43it/s]\n", + "calculating kernels: 16836it [00:00, 69555.28it/s]\n", "\n", - " --- kernel matrix of random walk kernel of size 183 built in 0.37821507453918457 seconds ---\n", + " --- kernel matrix of random walk kernel of size 183 built in 0.3833920955657959 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-10, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'sylvester', 'weight': 1e-10, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", "10 gram matrices are calculated, 0 of which are ignored.\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [00:33, 1.10s/it]\n", + "cross validation: 30it [00:33, 1.11s/it]\n", "\n", "4. Getting final performance...\n", - "best_params_out: [{'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8}]\n", - "best_params_in: [{'alpha': 3.1622776601683795e-10}]\n", + "best_params_out: [{'compute_method': 'sylvester', 'weight': 0.01, 'n_jobs': 8, 'verbose': True}]\n", + "best_params_in: [{'alpha': 1e-10}]\n", "\n", - "best_val_perf: 31.894498817348637\n", - "best_val_std: 0.5235865101548381\n", - "final_performance: [32.04579601256704]\n", - "final_confidence: [3.9737533137212138]\n", - "train_performance: [30.830572265896325]\n", - "train_std: [0.44072573889937117]\n", + "best_val_perf: 31.76835551233969\n", + "best_val_std: 0.43269972907929183\n", + "final_performance: [32.391882524496765]\n", + "final_confidence: [2.6542337929023336]\n", + "train_performance: [30.70127313658435]\n", + "train_std: [0.31861204198126475]\n", "\n", - "time to calculate gram matrix with different hyper-params: 0.38±0.01s\n", - "time to calculate best gram matrix: 0.37±nans\n", - "total training time with all hyper-param choices: 40.27s\n", + "time to calculate gram matrix with different hyper-params: 0.38±0.02s\n", + "time to calculate best gram matrix: 0.37±0.00s\n", + "total training time with all hyper-param choices: 40.53s\n", "\n", "\n", "--- This is a regression problem ---\n", @@ -175,24 +175,30 @@ "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 26008.32it/s]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", - " keepdims=keepdims)\n", - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "calculating kernels: 12501it [00:01, 6173.52it/s]" + "reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 28950.24it/s]\n", + "calculating kernels: 16836it [00:02, 6540.43it/s]\n", + "\n", + " --- kernel matrix of random walk kernel of size 183 built in 2.6675093173980713 seconds ---\n", + "\n", + "the gram matrix with parameters {'compute_method': 'conjugate', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'weight': 0.1, 'n_jobs': 8, 'verbose': True} is: \n", + "\n", + "\n", + "\n", + " None edge weight specified. Set all weight to 1.\n", + "\n", + "reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 28019.19it/s]\n", + "calculating kernels: 16836it [00:02, 7963.48it/s]\n", + "\n", + " --- kernel matrix of random walk kernel of size 183 built in 2.2675061225891113 seconds ---\n", + "\n", + "the gram matrix with parameters {'compute_method': 'conjugate', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'weight': 0.01, 'n_jobs': 8, 'verbose': True} is: \n", + "\n", + "\n", + "\n", + " None edge weight specified. Set all weight to 1.\n", + "\n", + "reindex vertices: 100%|██████████| 183/183 [00:00<00:00, 23036.63it/s]\n", + "calculating kernels: 12801it [00:01, 8043.11it/s]" ] } ], @@ -299,7 +305,8 @@ " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", " ds_name=ds['name'],\n", " n_jobs=multiprocessing.cpu_count(),\n", - " read_gm_from_file=False)\n", + " read_gm_from_file=False,\n", + " verbose=True)\n", " print()" ] } diff --git a/notebooks/run_randomwalkkernel.py b/notebooks/run_randomwalkkernel.py index dbf5a99..381ba80 100644 --- a/notebooks/run_randomwalkkernel.py +++ b/notebooks/run_randomwalkkernel.py @@ -106,5 +106,6 @@ for ds in dslist: extra_params=(ds['extra_params'] if 'extra_params' in ds else None), ds_name=ds['name'], n_jobs=multiprocessing.cpu_count(), - read_gm_from_file=False) + read_gm_from_file=False, + verbose=True) print() \ No newline at end of file diff --git a/notebooks/run_spkernel.ipynb b/notebooks/run_spkernel.ipynb index 33ca48f..ad866f6 100644 --- a/notebooks/run_spkernel.ipynb +++ b/notebooks/run_spkernel.ipynb @@ -23,34 +23,34 @@ "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "getting sp graphs: 183it [00:00, 5345.48it/s]\n", - "calculating kernels: 16836it [00:01, 16066.90it/s]\n", + "getting sp graphs: 183it [00:00, 11704.68it/s]\n", + "calculating kernels: 16836it [00:00, 17085.14it/s]\n", "\n", - " --- shortest path kernel matrix of size 183 built in 1.2855160236358643 seconds ---\n", + " --- shortest path kernel matrix of size 183 built in 1.2640743255615234 seconds ---\n", "\n", - "the gram matrix with parameters {'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", "1 gram matrices are calculated, 0 of which are ignored.\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [00:03, 8.63it/s]\n", + "cross validation: 30it [00:03, 8.84it/s]\n", "\n", "4. Getting final performance...\n", - "best_params_out: [{'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8}]\n", - "best_params_in: [{'alpha': 0.0001}]\n", + "best_params_out: [{'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True}]\n", + "best_params_in: [{'alpha': 1e-10}]\n", "\n", - "best_val_perf: 9.674788994813262\n", - "best_val_std: 0.6229031522274688\n", - "final_performance: [9.590999824754439]\n", - "final_confidence: [2.911796096257332]\n", - "train_performance: [6.16594412531739]\n", - "train_std: [0.2739093211154806]\n", + "best_val_perf: 9.687399048018559\n", + "best_val_std: 0.654180946161292\n", + "final_performance: [9.411656660355659]\n", + "final_confidence: [2.500437167823725]\n", + "train_performance: [6.168480355249007]\n", + "train_std: [0.2541557651056269]\n", "\n", - "time to calculate gram matrix with different hyper-params: 1.29±nans\n", - "time to calculate best gram matrix: 1.29±nans\n", - "total training time with all hyper-param choices: 5.15s\n", + "time to calculate gram matrix with different hyper-params: 1.26±0.00s\n", + "time to calculate best gram matrix: 1.26±0.00s\n", + "total training time with all hyper-param choices: 5.09s\n", "\n", "\n", "\n", @@ -67,51 +67,35 @@ "\n", "\n", " 1 graphs are removed as they don't contain edges.\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", - " keepdims=keepdims)\n", - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "getting sp graphs: 149it [00:00, 6510.18it/s]\n", - "calculating kernels: 11175it [00:00, 18881.68it/s]\n", "\n", - " --- shortest path kernel matrix of size 149 built in 0.8007419109344482 seconds ---\n", + "getting sp graphs: 149it [00:00, 7096.72it/s]\n", + "calculating kernels: 11175it [00:00, 19504.73it/s]\n", "\n", - "the gram matrix with parameters {'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", + " --- shortest path kernel matrix of size 149 built in 0.7957959175109863 seconds ---\n", + "\n", + "the gram matrix with parameters {'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", "1 gram matrices are calculated, 0 of which are ignored.\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [00:02, 10.52it/s]\n", + "cross validation: 30it [00:02, 10.74it/s]\n", "\n", "4. Getting final performance...\n", - "best_params_out: [{'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8}]\n", - "best_params_in: [{'alpha': 3.162277660168379e-07}]\n", + "best_params_out: [{'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True}]\n", + "best_params_in: [{'alpha': 1e-05}]\n", "\n", - "best_val_perf: 8.784264102873752\n", - "best_val_std: 0.2656887278835053\n", - "final_performance: [8.059911355753659]\n", - "final_confidence: [1.9620843656589473]\n", - "train_performance: [7.8406202266920575]\n", - "train_std: [0.2177862360087283]\n", + "best_val_perf: 8.745832519261795\n", + "best_val_std: 0.26293501071192543\n", + "final_performance: [7.942686332248635]\n", + "final_confidence: [1.617779657027359]\n", + "train_performance: [7.860965083396337]\n", + "train_std: [0.16888913664254188]\n", "\n", - "time to calculate gram matrix with different hyper-params: 0.80±nans\n", - "time to calculate best gram matrix: 0.80±nans\n", - "total training time with all hyper-param choices: 4.02s\n", + "time to calculate gram matrix with different hyper-params: 0.80±0.00s\n", + "time to calculate best gram matrix: 0.80±0.00s\n", + "total training time with all hyper-param choices: 3.90s\n", "\n", "\n", "\n", @@ -126,34 +110,34 @@ "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "getting sp graphs: 68it [00:00, 1095.77it/s]\n", - "calculating kernels: 2346it [00:02, 813.63it/s]\n", + "getting sp graphs: 68it [00:00, 2292.58it/s]\n", + "calculating kernels: 2346it [00:02, 873.39it/s]\n", "\n", - " --- shortest path kernel matrix of size 68 built in 3.110588550567627 seconds ---\n", + " --- shortest path kernel matrix of size 68 built in 2.986046075820923 seconds ---\n", "\n", - "the gram matrix with parameters {'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", "1 gram matrices are calculated, 0 of which are ignored.\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [00:02, 10.97it/s]\n", + "cross validation: 30it [00:02, 11.85it/s]\n", "\n", "4. Getting final performance...\n", - "best_params_out: [{'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8}]\n", + "best_params_out: [{'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True}]\n", "best_params_in: [{'C': 3162.2776601683795}]\n", "\n", - "best_val_perf: 0.8798412698412699\n", - "best_val_std: 0.02062186442241262\n", - "final_performance: [0.9042857142857144]\n", - "final_confidence: [0.07343487734322982]\n", - "train_performance: [0.9709180695847363]\n", - "train_std: [0.005927396388634032]\n", + "best_val_perf: 0.8780158730158729\n", + "best_val_std: 0.028162670831398482\n", + "final_performance: [0.8714285714285714]\n", + "final_confidence: [0.09446318571439967]\n", + "train_performance: [0.9740729517396185]\n", + "train_std: [0.007872630412568218]\n", "\n", - "time to calculate gram matrix with different hyper-params: 3.11±nans\n", - "time to calculate best gram matrix: 3.11±nans\n", - "total training time with all hyper-param choices: 6.21s\n", + "time to calculate gram matrix with different hyper-params: 2.99±0.00s\n", + "time to calculate best gram matrix: 2.99±0.00s\n", + "total training time with all hyper-param choices: 5.93s\n", "\n", "\n", "\n", @@ -168,19 +152,8 @@ "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "getting sp graphs: 94it [00:00, 2190.46it/s]\n", - "calculating kernels: 4465it [00:05, 763.81it/s]\n", - "\n", - " --- shortest path kernel matrix of size 94 built in 6.083932399749756 seconds ---\n", - "\n", - "the gram matrix with parameters {'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", - "\n", - "\n", - "\n", - "1 gram matrices are calculated, 0 of which are ignored.\n", - "\n", - "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 0it [00:00, ?it/s]" + "getting sp graphs: 94it [00:00, 2131.93it/s]\n", + "calculating kernels: 1501it [00:01, 78.00it/s]" ] } ], @@ -264,7 +237,8 @@ " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", " ds_name=ds['name'],\n", " n_jobs=multiprocessing.cpu_count(),\n", - " read_gm_from_file=False)\n", + " read_gm_from_file=False,\n", + " verbose=True)\n", " print()" ] } diff --git a/notebooks/run_spkernel.py b/notebooks/run_spkernel.py index f4b9193..a8634d4 100644 --- a/notebooks/run_spkernel.py +++ b/notebooks/run_spkernel.py @@ -77,5 +77,6 @@ for ds in dslist: extra_params=(ds['extra_params'] if 'extra_params' in ds else None), ds_name=ds['name'], n_jobs=multiprocessing.cpu_count(), - read_gm_from_file=False) + read_gm_from_file=False, + verbose=True) print() \ No newline at end of file diff --git a/notebooks/run_structuralspkernel.ipynb b/notebooks/run_structuralspkernel.ipynb index 8d893b4..43f7c1c 100644 --- a/notebooks/run_structuralspkernel.ipynb +++ b/notebooks/run_structuralspkernel.ipynb @@ -23,34 +23,34 @@ "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "getting shortest paths: 183it [00:00, 5316.42it/s]\n", - "calculating kernels: 16836it [00:03, 4625.84it/s]\n", + "getting shortest paths: 183it [00:00, 5323.35it/s]\n", + "calculating kernels: 16836it [00:02, 5980.75it/s]\n", "\n", - " --- shortest path kernel matrix of size 183 built in 3.8611345291137695 seconds ---\n", + " --- shortest path kernel matrix of size 183 built in 3.0884954929351807 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", "1 gram matrices are calculated, 0 of which are ignored.\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [00:03, 8.71it/s]\n", + "cross validation: 30it [00:03, 8.90it/s]\n", "\n", "4. Getting final performance...\n", - "best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8}]\n", - "best_params_in: [{'alpha': 0.0031622776601683794}]\n", + "best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True}]\n", + "best_params_in: [{'alpha': 0.001}]\n", "\n", - "best_val_perf: 12.673707811197355\n", - "best_val_std: 0.8773195213759171\n", - "final_performance: [12.972668262063593]\n", - "final_confidence: [3.7642237202379087]\n", - "train_performance: [3.934708519599526]\n", - "train_std: [0.16225809646161615]\n", + "best_val_perf: 12.857015647214508\n", + "best_val_std: 0.8860388066269581\n", + "final_performance: [12.157314781928168]\n", + "final_confidence: [2.5739406086892296]\n", + "train_performance: [3.773093745028789]\n", + "train_std: [0.12430822644728814]\n", "\n", - "time to calculate gram matrix with different hyper-params: 3.86±nans\n", - "time to calculate best gram matrix: 3.86±nans\n", - "total training time with all hyper-param choices: 7.74s\n", + "time to calculate gram matrix with different hyper-params: 3.09±0.00s\n", + "time to calculate best gram matrix: 3.09±0.00s\n", + "total training time with all hyper-param choices: 6.84s\n", "\n", "\n", "\n", @@ -64,51 +64,35 @@ "2. Calculating gram matrices. This could take a while...\n", "\n", " None edge weight specified. Set all weight to 1.\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", - " keepdims=keepdims)\n", - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "getting shortest paths: 150it [00:00, 8822.07it/s]\n", - "calculating kernels: 11325it [00:02, 5167.04it/s]\n", "\n", - " --- shortest path kernel matrix of size 150 built in 2.394453525543213 seconds ---\n", + "getting shortest paths: 150it [00:00, 5191.83it/s]\n", + "calculating kernels: 11325it [00:01, 7143.18it/s]\n", "\n", - "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", + " --- shortest path kernel matrix of size 150 built in 1.7898523807525635 seconds ---\n", + "\n", + "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", "1 gram matrices are calculated, 0 of which are ignored.\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [00:02, 10.78it/s]\n", + "cross validation: 30it [00:02, 10.59it/s]\n", "\n", "4. Getting final performance...\n", - "best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8}]\n", + "best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8, 'verbose': True}]\n", "best_params_in: [{'alpha': 0.1}]\n", "\n", - "best_val_perf: 11.082918177885857\n", - "best_val_std: 0.3037589925734673\n", - "final_performance: [7.8261546009779925]\n", - "final_confidence: [1.59375970943081]\n", - "train_performance: [7.988630946761633]\n", - "train_std: [0.16054607648943253]\n", + "best_val_perf: 11.040598123045763\n", + "best_val_std: 0.31492017111536147\n", + "final_performance: [8.138193149138093]\n", + "final_confidence: [1.6238744767195439]\n", + "train_performance: [7.9412913127748235]\n", + "train_std: [0.18726339675217385]\n", "\n", - "time to calculate gram matrix with different hyper-params: 2.39±nans\n", - "time to calculate best gram matrix: 2.39±nans\n", - "total training time with all hyper-param choices: 5.49s\n", + "time to calculate gram matrix with different hyper-params: 1.79±0.00s\n", + "time to calculate best gram matrix: 1.79±0.00s\n", + "total training time with all hyper-param choices: 5.00s\n", "\n", "\n", "\n", @@ -123,61 +107,8 @@ "\n", " None edge weight specified. Set all weight to 1.\n", "\n", - "getting shortest paths: 68it [00:00, 567.53it/s]\n", - "calculating kernels: 2346it [00:14, 161.71it/s]\n", - "\n", - " --- shortest path kernel matrix of size 68 built in 14.833482265472412 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", - "\n", - "\n", - "\n", - "1 gram matrices are calculated, 0 of which are ignored.\n", - "\n", - "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [00:02, 13.38it/s]\n", - "\n", - "4. Getting final performance...\n", - "best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8}]\n", - "best_params_in: [{'C': 1000.0}]\n", - "\n", - "best_val_perf: 0.9084126984126983\n", - "best_val_std: 0.027912022159840448\n", - "final_performance: [0.9085714285714286]\n", - "final_confidence: [0.0879511091875412]\n", - "train_performance: [0.9679438832772166]\n", - "train_std: [0.00754192133247499]\n", - "\n", - "time to calculate gram matrix with different hyper-params: 14.83±nans\n", - "time to calculate best gram matrix: 14.83±nans\n", - "total training time with all hyper-param choices: 17.42s\n", - "\n", - "\n", - "\n", - "PAH\n", - "\n", - "--- This is a classification problem ---\n", - "\n", - "\n", - "1. Loading dataset from file...\n", - "\n", - "2. Calculating gram matrices. This could take a while...\n", - "\n", - " None edge weight specified. Set all weight to 1.\n", - "\n", - "getting shortest paths: 94it [00:00, 447.28it/s]\n", - "calculating kernels: 4465it [01:04, 68.94it/s] \n", - "\n", - " --- shortest path kernel matrix of size 94 built in 65.20552921295166 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'node_kernels': {'symb': , 'nsymb': , 'mix': functools.partial(, , )}, 'n_jobs': 8} is: \n", - "\n", - "\n", - "\n", - "1 gram matrices are calculated, 0 of which are ignored.\n", - "\n", - "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 0it [00:00, ?it/s]" + "getting shortest paths: 68it [00:00, 536.19it/s]\n", + "calculating kernels: 0it [00:00, ?it/s]" ] } ], @@ -260,7 +191,8 @@ " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", " ds_name=ds['name'],\n", " n_jobs=multiprocessing.cpu_count(),\n", - " read_gm_from_file=False)\n", + " read_gm_from_file=False,\n", + " verbose=True)\n", " print()" ] } diff --git a/notebooks/run_structuralspkernel.py b/notebooks/run_structuralspkernel.py index aa4f562..1ceb07c 100644 --- a/notebooks/run_structuralspkernel.py +++ b/notebooks/run_structuralspkernel.py @@ -84,5 +84,6 @@ for ds in dslist: extra_params=(ds['extra_params'] if 'extra_params' in ds else None), ds_name=ds['name'], n_jobs=multiprocessing.cpu_count(), - read_gm_from_file=False) + read_gm_from_file=False, + verbose=True) print() \ No newline at end of file diff --git a/notebooks/run_untilhpathkernel.ipynb b/notebooks/run_untilhpathkernel.ipynb index 4931288..694d632 100644 --- a/notebooks/run_untilhpathkernel.ipynb +++ b/notebooks/run_untilhpathkernel.ipynb @@ -20,203 +20,187 @@ "1. Loading dataset from file...\n", "\n", "2. Calculating gram matrices. This could take a while...\n", - "getting paths: 183it [00:00, 33583.79it/s]\n", - "calculating kernels: 16836it [00:00, 382919.33it/s]\n", + "getting paths: 183it [00:00, 22697.39it/s]\n", + "calculating kernels: 16836it [00:00, 371524.56it/s]\n", "\n", - " --- kernel matrix of path kernel up to 1 of size 183 built in 0.28138017654418945 seconds ---\n", + " --- kernel matrix of path kernel up to 1 of size 183 built in 0.27962422370910645 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 49932.19it/s]\n", - "calculating kernels: 16836it [00:00, 339040.24it/s]\n", + "getting paths: 183it [00:00, 35988.26it/s]\n", + "calculating kernels: 16836it [00:00, 444708.75it/s]\n", "\n", - " --- kernel matrix of path kernel up to 1 of size 183 built in 0.2915959358215332 seconds ---\n", + " --- kernel matrix of path kernel up to 1 of size 183 built in 0.284440279006958 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 13100.71it/s]\n", - "calculating kernels: 16836it [00:00, 195915.25it/s]\n", + "getting paths: 183it [00:00, 26474.81it/s]\n", + "calculating kernels: 16836it [00:00, 215084.65it/s]\n", "\n", - " --- kernel matrix of path kernel up to 2 of size 183 built in 0.39291882514953613 seconds ---\n", + " --- kernel matrix of path kernel up to 2 of size 183 built in 0.2832369804382324 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 15186.23it/s]\n", - "calculating kernels: 16836it [00:00, 216679.82it/s]\n", + "getting paths: 183it [00:00, 18360.43it/s]\n", + "calculating kernels: 16836it [00:00, 254309.18it/s]\n", "\n", - " --- kernel matrix of path kernel up to 2 of size 183 built in 0.2922053337097168 seconds ---\n", + " --- kernel matrix of path kernel up to 2 of size 183 built in 0.28844165802001953 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 8410.48it/s]\n", - "calculating kernels: 16836it [00:00, 146690.73it/s]\n", + "getting paths: 183it [00:00, 8687.30it/s]\n", + "calculating kernels: 16836it [00:00, 168741.96it/s]\n", "\n", - " --- kernel matrix of path kernel up to 3 of size 183 built in 0.3915746212005615 seconds ---\n", + " --- kernel matrix of path kernel up to 3 of size 183 built in 0.38907885551452637 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 13951.28it/s]\n", - "calculating kernels: 16836it [00:00, 201673.88it/s]\n", + "getting paths: 183it [00:00, 11379.65it/s]\n", + "calculating kernels: 16836it [00:00, 195770.23it/s]\n", "\n", - " --- kernel matrix of path kernel up to 3 of size 183 built in 0.3854410648345947 seconds ---\n", + " --- kernel matrix of path kernel up to 3 of size 183 built in 0.39213061332702637 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 10054.46it/s]\n", - "calculating kernels: 16836it [00:00, 70713.10it/s]\n", + "getting paths: 183it [00:00, 8062.50it/s]\n", + "calculating kernels: 16836it [00:00, 72349.59it/s]\n", "\n", - " --- kernel matrix of path kernel up to 4 of size 183 built in 0.48105573654174805 seconds ---\n", + " --- kernel matrix of path kernel up to 4 of size 183 built in 0.512467622756958 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 1418.94it/s]\n", - "calculating kernels: 16836it [00:00, 144898.57it/s]\n", + "getting paths: 183it [00:00, 10578.68it/s]\n", + "calculating kernels: 16836it [00:00, 133704.13it/s]\n", "\n", - " --- kernel matrix of path kernel up to 4 of size 183 built in 0.5477819442749023 seconds ---\n", + " --- kernel matrix of path kernel up to 4 of size 183 built in 0.3866546154022217 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 15604.25it/s]\n", - "calculating kernels: 16836it [00:00, 103300.82it/s]\n", + "getting paths: 183it [00:00, 9220.91it/s]\n", + "calculating kernels: 16836it [00:00, 98386.86it/s] \n", "\n", - " --- kernel matrix of path kernel up to 5 of size 183 built in 0.3788299560546875 seconds ---\n", + " --- kernel matrix of path kernel up to 5 of size 183 built in 0.38112974166870117 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 9795.27it/s]\n", - "calculating kernels: 16836it [00:00, 121689.68it/s]\n", + "getting paths: 183it [00:00, 8493.03it/s]\n", + "calculating kernels: 16836it [00:00, 119698.11it/s]\n", "\n", - " --- kernel matrix of path kernel up to 5 of size 183 built in 0.3888108730316162 seconds ---\n", + " --- kernel matrix of path kernel up to 5 of size 183 built in 0.38007307052612305 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 7163.19it/s]\n", - "calculating kernels: 16836it [00:00, 89262.17it/s]\n", + "getting paths: 183it [00:00, 7385.55it/s]\n", + "calculating kernels: 16836it [00:00, 88347.09it/s]\n", "\n", - " --- kernel matrix of path kernel up to 6 of size 183 built in 0.39624905586242676 seconds ---\n", + " --- kernel matrix of path kernel up to 6 of size 183 built in 0.3929023742675781 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 16751.59it/s]\n", - "calculating kernels: 16836it [00:00, 100004.39it/s]\n", + "getting paths: 183it [00:00, 5394.24it/s]\n", + "calculating kernels: 16836it [00:00, 100946.78it/s]\n", "\n", - " --- kernel matrix of path kernel up to 6 of size 183 built in 0.388913631439209 seconds ---\n", + " --- kernel matrix of path kernel up to 6 of size 183 built in 0.3824801445007324 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 10090.81it/s]\n", - "calculating kernels: 16836it [00:00, 91172.28it/s] \n", + "getting paths: 183it [00:00, 12457.52it/s]\n", + "calculating kernels: 16836it [00:00, 68995.02it/s]\n", "\n", - " --- kernel matrix of path kernel up to 7 of size 183 built in 0.4908461570739746 seconds ---\n", + " --- kernel matrix of path kernel up to 7 of size 183 built in 0.49313783645629883 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 2997.78it/s]\n", - "calculating kernels: 16836it [00:00, 104945.65it/s]\n", + "getting paths: 183it [00:00, 2829.00it/s]\n", + "calculating kernels: 16836it [00:00, 105515.66it/s]\n", "\n", - " --- kernel matrix of path kernel up to 7 of size 183 built in 0.36611366271972656 seconds ---\n", + " --- kernel matrix of path kernel up to 7 of size 183 built in 0.35750555992126465 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 6353.90it/s]\n", - "calculating kernels: 16836it [00:00, 80425.25it/s]\n", + "getting paths: 183it [00:00, 7427.43it/s]\n", + "calculating kernels: 16836it [00:00, 81607.79it/s]\n", "\n", - " --- kernel matrix of path kernel up to 8 of size 183 built in 0.5061323642730713 seconds ---\n", + " --- kernel matrix of path kernel up to 8 of size 183 built in 0.4937615394592285 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 9427.60it/s]\n", - "calculating kernels: 16836it [00:00, 93863.88it/s] \n", + "getting paths: 183it [00:00, 5660.08it/s]\n", + "calculating kernels: 16836it [00:00, 90014.85it/s]\n", "\n", - " --- kernel matrix of path kernel up to 8 of size 183 built in 0.3872077465057373 seconds ---\n", + " --- kernel matrix of path kernel up to 8 of size 183 built in 0.36504673957824707 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 7575.20it/s]\n", - "calculating kernels: 16836it [00:00, 82517.07it/s]\n", + "getting paths: 183it [00:00, 7548.83it/s]\n", + "calculating kernels: 16836it [00:00, 79498.55it/s]\n", "\n", - " --- kernel matrix of path kernel up to 9 of size 183 built in 0.48129963874816895 seconds ---\n", + " --- kernel matrix of path kernel up to 9 of size 183 built in 0.47993040084838867 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 6563.74it/s]\n", - "calculating kernels: 16836it [00:00, 94045.02it/s] \n", + "getting paths: 183it [00:00, 7319.90it/s]\n", + "calculating kernels: 16836it [00:00, 92310.24it/s]\n", "\n", - " --- kernel matrix of path kernel up to 9 of size 183 built in 0.39592933654785156 seconds ---\n", + " --- kernel matrix of path kernel up to 9 of size 183 built in 0.3970515727996826 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 6069.81it/s]\n", - "calculating kernels: 16836it [00:00, 77447.83it/s]\n", + "getting paths: 183it [00:00, 8318.60it/s]\n", + "calculating kernels: 16836it [00:00, 89934.38it/s] \n", "\n", - " --- kernel matrix of path kernel up to 10 of size 183 built in 0.47420382499694824 seconds ---\n", + " --- kernel matrix of path kernel up to 10 of size 183 built in 0.4861469268798828 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 183it [00:00, 9481.17it/s]\n", - "calculating kernels: 16836it [00:00, 58253.60it/s]\n", + "getting paths: 183it [00:00, 2635.72it/s]\n", + "calculating kernels: 16836it [00:00, 90123.30it/s]\n", "\n", - " --- kernel matrix of path kernel up to 10 of size 183 built in 0.4869115352630615 seconds ---\n", + " --- kernel matrix of path kernel up to 10 of size 183 built in 0.367603063583374 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", "\n", "20 gram matrices are calculated, 0 of which are ignored.\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 30it [01:07, 1.10s/it]\n", + "cross validation: 30it [01:06, 1.11s/it]\n", "\n", "4. Getting final performance...\n", - "best_params_out: [{'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8}]\n", + "best_params_out: [{'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True}]\n", "best_params_in: [{'alpha': 0.01}]\n", "\n", - "best_val_perf: 6.8347760734601675\n", - "best_val_std: 0.26187601855914455\n", - "final_performance: [6.844597847292873]\n", - "final_confidence: [1.3282917788841784]\n", - "train_performance: [2.2886614412566524]\n", - "train_std: [0.11697823620293107]\n", + "best_val_perf: 6.842702754673377\n", + "best_val_std: 0.3600238142615252\n", + "final_performance: [7.557191252340816]\n", + "final_confidence: [2.5849069582911595]\n", + "train_performance: [2.276370048287339]\n", + "train_std: [0.13830866732067562]\n", + "\n", + "time to calculate gram matrix with different hyper-params: 0.39±0.07s\n", + "time to calculate best gram matrix: 0.28±0.00s\n", + "total training time with all hyper-param choices: 79.82s\n", "\n", - "time to calculate gram matrix with different hyper-params: 0.41±0.07s\n", - "time to calculate best gram matrix: 0.39±nans\n", - "total training time with all hyper-param choices: 82.00s\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", - " keepdims=keepdims)\n", - "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ "\n", "\n", "Alkane\n", @@ -227,171 +211,44 @@ "1. Loading dataset from file...\n", "\n", "2. Calculating gram matrices. This could take a while...\n", - "getting paths: 150it [00:00, 38060.83it/s]\n", - "calculating kernels: 11325it [00:00, 447307.64it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 1 of size 150 built in 0.29852986335754395 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 16860.39it/s]\n", - "calculating kernels: 11325it [00:00, 522115.40it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 1 of size 150 built in 0.29816317558288574 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 18149.30it/s]\n", - "calculating kernels: 11325it [00:00, 383173.55it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 2 of size 150 built in 0.29796385765075684 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 23172.10it/s]\n", - "calculating kernels: 11325it [00:00, 427074.37it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 2 of size 150 built in 0.3109288215637207 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 13243.78it/s]\n", - "calculating kernels: 11325it [00:00, 269283.28it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 3 of size 150 built in 0.29997825622558594 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 20671.78it/s]\n", - "calculating kernels: 11325it [00:00, 312080.29it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 3 of size 150 built in 0.29572534561157227 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 3.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 15393.45it/s]\n", - "calculating kernels: 11325it [00:00, 362928.87it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 4 of size 150 built in 0.30132484436035156 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 16957.65it/s]\n", - "calculating kernels: 11325it [00:00, 84503.61it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 4 of size 150 built in 0.4178507328033447 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 4.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 14440.54it/s]\n", - "calculating kernels: 11325it [00:00, 276894.92it/s]\n", + "getting paths: 150it [00:00, 31366.32it/s]\n", + "calculating kernels: 11325it [00:00, 509820.58it/s]\n", "\n", - " --- kernel matrix of path kernel up to 5 of size 150 built in 0.29338693618774414 seconds ---\n", + " --- kernel matrix of path kernel up to 1 of size 150 built in 0.29791831970214844 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 150it [00:00, 15619.30it/s]\n", - "calculating kernels: 11325it [00:00, 254676.58it/s]\n", + "getting paths: 150it [00:00, 30330.50it/s]\n", + "calculating kernels: 11325it [00:00, 655613.27it/s]\n", "\n", - " --- kernel matrix of path kernel up to 5 of size 150 built in 0.29663729667663574 seconds ---\n", + " --- kernel matrix of path kernel up to 1 of size 150 built in 0.29232001304626465 seconds ---\n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 5.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 1.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", "\n", - "getting paths: 150it [00:00, 14585.84it/s]\n", - "calculating kernels: 11325it [00:00, 242964.30it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 6 of size 150 built in 0.29677391052246094 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 11555.83it/s]\n", - "calculating kernels: 11325it [00:00, 330949.31it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 6 of size 150 built in 0.2948622703552246 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 6.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 10424.60it/s]\n", - "calculating kernels: 11325it [00:00, 238514.96it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 7 of size 150 built in 0.3041496276855469 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 12318.80it/s]\n", - "calculating kernels: 11325it [00:00, 251979.97it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 7 of size 150 built in 0.3013496398925781 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 7.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 10722.00it/s]\n", - "calculating kernels: 11325it [00:00, 232363.74it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 8 of size 150 built in 0.294144868850708 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 18965.59it/s]\n", - "calculating kernels: 11325it [00:00, 424638.55it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 8 of size 150 built in 0.2961091995239258 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 8.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 13945.38it/s]\n", - "calculating kernels: 11325it [00:00, 286344.19it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 9 of size 150 built in 0.30029296875 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 9525.87it/s]\n", - "calculating kernels: 11325it [00:00, 231776.43it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 9 of size 150 built in 0.29835057258605957 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 9.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", - "\n", - "\n", - "getting paths: 150it [00:00, 16916.15it/s]\n", - "calculating kernels: 11325it [00:00, 85396.60it/s]\n", - "\n", - " --- kernel matrix of path kernel up to 10 of size 150 built in 0.42621588706970215 seconds ---\n", - "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'MinMax', 'n_jobs': 8} is: \n", - "\n", + "getting paths: 150it [00:00, 27568.71it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calculating kernels: 11325it [00:00, 780628.98it/s]\n", "\n", - "getting paths: 150it [00:00, 16108.40it/s]\n", - "calculating kernels: 11325it [00:00, 328896.12it/s]\n", + " --- kernel matrix of path kernel up to 2 of size 150 built in 0.2590019702911377 seconds ---\n", "\n", - " --- kernel matrix of path kernel up to 10 of size 150 built in 0.301084041595459 seconds ---\n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'MinMax', 'n_jobs': 8, 'verbose': True} is: \n", "\n", - "the gram matrix with parameters {'compute_method': 'trie', 'depth': 10.0, 'k_func': 'tanimoto', 'n_jobs': 8} is: \n", "\n", + "getting paths: 150it [00:00, 17554.29it/s]\n", + "calculating kernels: 11325it [00:00, 320784.55it/s]\n", "\n", + " --- kernel matrix of path kernel up to 2 of size 150 built in 0.3091611862182617 seconds ---\n", "\n", - "20 gram matrices are calculated, 0 of which are ignored.\n", + "the gram matrix with parameters {'compute_method': 'trie', 'depth': 2.0, 'k_func': 'tanimoto', 'n_jobs': 8, 'verbose': True} is: \n", "\n", - "3. Fitting and predicting using nested cross validation. This could really take a while...\n", - "cross validation: 0it [00:00, ?it/s]" + "\n" ] } ], @@ -472,7 +329,8 @@ " extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", " ds_name=ds['name'],\n", " n_jobs=multiprocessing.cpu_count(),\n", - " read_gm_from_file=False)\n", + " read_gm_from_file=False,\n", + " verbose=True)\n", " print()" ] }, diff --git a/notebooks/run_untilhpathkernel.py b/notebooks/run_untilhpathkernel.py index 401dbe4..6d36066 100644 --- a/notebooks/run_untilhpathkernel.py +++ b/notebooks/run_untilhpathkernel.py @@ -80,5 +80,6 @@ for ds in dslist: extra_params=(ds['extra_params'] if 'extra_params' in ds else None), ds_name=ds['name'], n_jobs=multiprocessing.cpu_count(), - read_gm_from_file=False) + read_gm_from_file=False, + verbose=True) print() \ No newline at end of file diff --git a/preimage/pathfrequency.py b/preimage/pathfrequency.py index 00f9674..3bca1bc 100644 --- a/preimage/pathfrequency.py +++ b/preimage/pathfrequency.py @@ -6,23 +6,196 @@ Created on Wed Mar 20 10:12:15 2019 inferring a graph grom path frequency. @author: ljia """ +#import numpy as np +import networkx as nx +from scipy.spatial.distance import hamming +import itertools def SISF(K, v): if output: return output else: return 'no solution' + def SISF_M(K, v): return output -def GIPF_tree(K, v): + +def GIPF_tree(v_obj, K=1, alphabet=[0, 1]): if K == 1: - pass - if G: - return G - else: - return 'no solution' + n_graph = v_obj[0] + v_obj[1] + D_T, father_idx = getDynamicTable(n_graph, alphabet) + + # get the vector the closest to v_obj. + if v_obj not in D_T: + print('no exact solution') + dis_lim = 1 / len(v_obj) # the possible shortest distance. + dis_min = 1.0 # minimum proportional distance + v_min = v_obj + for vc in D_T: + if vc[0] + vc[1] == n_graph: +# print(vc) + dis = hamming(vc, v_obj) + if dis < dis_min: + dis_min = dis + v_min = vc + if dis_min <= dis_lim: + break + v_obj = v_min + + # obtain required graph by traceback procedure. + return getObjectGraph(v_obj, D_T, father_idx, alphabet), v_obj def GIPF_M(K, v): - return G \ No newline at end of file + return G + + +def getDynamicTable(n_graph, alphabet=[0, 1]): + # init. When only one node exists. + D_T = {(1, 0, 0, 0, 0, 0): 1, (0, 1, 0, 0, 0, 0): 1, (0, 0, 1, 0, 0, 0): 0, + (0, 0, 0, 1, 0, 0): 0, (0, 0, 0, 0, 1, 0): 0, (0, 0, 0, 0, 0, 1): 0,} + D_T = [(1, 0, 0, 0, 0, 0), (0, 1, 0, 0, 0, 0)] + father_idx = [-1, -1] # index of each vector's father + # add possible vectors. + for idx, v in enumerate(D_T): + if v[0] + v[1] < n_graph: + D_T.append((v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])) + D_T.append((v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])) + D_T.append((v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])) + D_T.append((v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)) + father_idx += [idx, idx, idx, idx] + +# D_T = itertools.chain([(1, 0, 0, 0, 0, 0)], [(0, 1, 0, 0, 0, 0)]) +# father_idx = itertools.chain([-1], [-1]) # index of each vector's father +# # add possible vectors. +# for idx, v in enumerate(D_T): +# if v[0] + v[1] < n_graph: +# D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])]) +# D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])]) +# D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])]) +# D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)]) +# father_idx = itertools.chain(father_idx, [idx, idx, idx, idx]) + return D_T, father_idx + + +def getObjectGraph(v_obj, D_T, father_idx, alphabet=[0, 1]): + g_obj = nx.Graph() + + # do vector traceback. + v_tb = [list(v_obj)] # traceback vectors. + v_tb_idx = [D_T.index(v_obj)] # indices of traceback vectors. + while v_tb_idx[-1] > 1: + idx_pre = father_idx[v_tb_idx[-1]] + v_tb_idx.append(idx_pre) + v_tb.append(list(D_T[idx_pre])) + v_tb = v_tb[::-1] # reverse +# v_tb_idx = v_tb_idx[::-1] + + # construct tree. + v_c = v_tb[0] # current vector. + if v_c[0] == 1: + g_obj.add_node(0, node_label=alphabet[0]) + else: + g_obj.add_node(0, node_label=alphabet[1]) + for vct in v_tb[1:]: + if vct[0] - v_c[0] == 1: + if vct[2] - v_c[2] == 2: # transfer 1 + label1 = alphabet[0] + label2 = alphabet[0] + else: # transfer 2 + label1 = alphabet[1] + label2 = alphabet[0] + else: + if vct[3] - v_c[3] == 1: # transfer 3 + label1 = alphabet[0] + label2 = alphabet[1] + else: # transfer 4 + label1 = alphabet[1] + label2 = alphabet[1] + for nd, attr in g_obj.nodes(data=True): + if attr['node_label'] == label1: + nb_node = nx.number_of_nodes(g_obj) + g_obj.add_node(nb_node, node_label=label2) + g_obj.add_edge(nd, nb_node) + break + v_c = vct + return g_obj + + +import random +def hierarchy_pos(G, root=None, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5): + + ''' + From Joel's answer at https://stackoverflow.com/a/29597209/2966723. + Licensed under Creative Commons Attribution-Share Alike + + If the graph is a tree this will return the positions to plot this in a + hierarchical layout. + + G: the graph (must be a tree) + + root: the root node of current branch + - if the tree is directed and this is not given, + the root will be found and used + - if the tree is directed and this is given, then + the positions will be just for the descendants of this node. + - if the tree is undirected and not given, + then a random choice will be used. + + width: horizontal space allocated for this branch - avoids overlap with other branches + + vert_gap: gap between levels of hierarchy + + vert_loc: vertical location of root + + xcenter: horizontal location of root + ''' + if not nx.is_tree(G): + raise TypeError('cannot use hierarchy_pos on a graph that is not a tree') + + if root is None: + if isinstance(G, nx.DiGraph): + root = next(iter(nx.topological_sort(G))) #allows back compatibility with nx version 1.11 + else: + root = random.choice(list(G.nodes)) + + def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None): + ''' + see hierarchy_pos docstring for most arguments + + pos: a dict saying where all nodes go if they have been assigned + parent: parent of this branch. - only affects it if non-directed + + ''' + + if pos is None: + pos = {root:(xcenter,vert_loc)} + else: + pos[root] = (xcenter, vert_loc) + children = list(G.neighbors(root)) + if not isinstance(G, nx.DiGraph) and parent is not None: + children.remove(parent) + if len(children)!=0: + dx = width/len(children) + nextx = xcenter - width/2 - dx/2 + for child in children: + nextx += dx + pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, + vert_loc = vert_loc-vert_gap, xcenter=nextx, + pos=pos, parent = root) + return pos + + + return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter) + + +if __name__ == '__main__': + v_obj = (6, 4, 10, 3, 3, 2) +# v_obj = (6, 5, 10, 3, 3, 2) + tree_obj, v_obj = GIPF_tree(v_obj) + print('One closest vector is', v_obj) + # plot + pos = hierarchy_pos(tree_obj, 0) + node_labels = nx.get_node_attributes(tree_obj, 'node_label') + nx.draw(tree_obj, pos=pos, labels=node_labels, with_labels=True) \ No newline at end of file diff --git a/pygraph/kernels/commonWalkKernel.py b/pygraph/kernels/commonWalkKernel.py index 2efe431..5186124 100644 --- a/pygraph/kernels/commonWalkKernel.py +++ b/pygraph/kernels/commonWalkKernel.py @@ -26,7 +26,8 @@ def commonwalkkernel(*args, n=None, weight=1, compute_method=None, - n_jobs=None): + n_jobs=None, + verbose=True): """Calculate common walk graph kernels between graphs. Parameters ---------- @@ -71,8 +72,9 @@ def commonwalkkernel(*args, idx = [G[0] for G in Gn] Gn = [G[1] for G in Gn] if len(Gn) != len_gn: - print('\n %d graphs are removed as they have only 1 node.\n' % - (len_gn - len(Gn))) + if verbose: + print('\n %d graphs are removed as they have only 1 node.\n' % + (len_gn - len(Gn))) ds_attrs = get_dataset_attributes( Gn, @@ -102,7 +104,7 @@ def commonwalkkernel(*args, elif compute_method == 'geo': do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(Gn,), n_jobs=n_jobs) + glbv=(Gn,), n_jobs=n_jobs, verbose=verbose) # pool = Pool(n_jobs) @@ -167,9 +169,9 @@ def commonwalkkernel(*args, # Kmatrix[j][i] = Kmatrix[i][j] run_time = time.time() - start_time - print( - "\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---" - % (len(Gn), run_time)) + if verbose: + print("\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---" + % (len(Gn), run_time)) return Kmatrix, run_time, idx diff --git a/pygraph/kernels/randomWalkKernel.py b/pygraph/kernels/randomWalkKernel.py index ae510ed..9424994 100644 --- a/pygraph/kernels/randomWalkKernel.py +++ b/pygraph/kernels/randomWalkKernel.py @@ -32,7 +32,8 @@ def randomwalkkernel(*args, edge_label='bond_type', # params for spectral method. sub_kernel=None, - n_jobs=None): + n_jobs=None, + verbose=True): """Calculate random walk graph kernels. Parameters ---------- @@ -60,7 +61,8 @@ def randomwalkkernel(*args, eweight = None if edge_weight == None: - print('\n None edge weight specified. Set all weight to 1.\n') + if verbose: + print('\n None edge weight specified. Set all weight to 1.\n') else: try: some_weight = list( @@ -68,13 +70,13 @@ def randomwalkkernel(*args, if isinstance(some_weight, float) or isinstance(some_weight, int): eweight = edge_weight else: - print( - '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' - % edge_weight) + if verbose: + print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' + % edge_weight) except: - print( - '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' - % edge_weight) + if verbose: + print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' + % edge_weight) ds_attrs = get_dataset_attributes( Gn, @@ -90,8 +92,9 @@ def randomwalkkernel(*args, idx = [G[0] for G in Gn] Gn = [G[1] for G in Gn] if len(Gn) != len_gn: - print('\n %d graphs are removed as they don\'t contain edges.\n' % - (len_gn - len(Gn))) + if verbose: + print('\n %d graphs are removed as they don\'t contain edges.\n' % + (len_gn - len(Gn))) start_time = time.time() @@ -100,26 +103,30 @@ def randomwalkkernel(*args, # gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed']) if compute_method == 'sylvester': - import warnings - warnings.warn('All labels are ignored.') - Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs) + if verbose: + import warnings + warnings.warn('All labels are ignored.') + Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs, verbose=verbose) elif compute_method == 'conjugate': - Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs, - node_kernels, edge_kernels, - node_label, edge_label, eweight, n_jobs) + Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs, node_kernels, + edge_kernels, node_label, edge_label, + eweight, n_jobs, verbose=verbose) elif compute_method == 'fp': Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels, - edge_kernels, node_label, edge_label, - eweight, n_jobs) + edge_kernels, node_label, edge_label, + eweight, n_jobs, verbose=verbose) elif compute_method == 'spectral': - import warnings - warnings.warn('All labels are ignored. Only works for undirected graphs.') - Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs) + if verbose: + import warnings + warnings.warn('All labels are ignored. Only works for undirected graphs.') + Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, + eweight, n_jobs, verbose=verbose) elif compute_method == 'kron': + pass for i in range(0, len(Gn)): for j in range(i, len(Gn)): Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j], @@ -131,15 +138,15 @@ def randomwalkkernel(*args, ) run_time = time.time() - start_time - print( - "\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---" - % (len(Gn), run_time)) + if verbose: + print("\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---" + % (len(Gn), run_time)) return Kmatrix, run_time, idx ############################################################################### -def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs): +def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, verbose=True): """Calculate walk graph kernels up to n between 2 graphs using Sylvester method. Parameters @@ -162,8 +169,9 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs): # don't normalize adjacency matrices if q is a uniform vector. Note # A_wave_list accually contains the transposes of the adjacency matrices. A_wave_list = [ - nx.adjacency_matrix(G, eweight).todense().transpose() for G in tqdm( - Gn, desc='compute adjacency matrices', file=sys.stdout) + nx.adjacency_matrix(G, eweight).todense().transpose() for G in + (tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) if + verbose else Gn) ] # # normalized adjacency matrices # A_wave_list = [] @@ -178,7 +186,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs): G_Awl = Awl_toshare do_partial = partial(wrapper_se_do, lmda) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(A_wave_list,), n_jobs=n_jobs) + glbv=(A_wave_list,), n_jobs=n_jobs, verbose=verbose) # pbar = tqdm( # total=(1 + len(Gn)) * len(Gn) / 2, @@ -226,7 +234,7 @@ def _se_do(A_wave1, A_wave2, lmda): ############################################################################### def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, - node_label, edge_label, eweight, n_jobs): + node_label, edge_label, eweight, n_jobs, verbose=True): """Calculate walk graph kernels up to n between 2 graphs using conjugate method. Parameters @@ -265,8 +273,8 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, # else: # reindex nodes using consecutive integers for convenience of kernel calculation. Gn = [nx.convert_node_labels_to_integers( - g, first_label=0, label_attribute='label_orignal') for g in tqdm( - Gn, desc='reindex vertices', file=sys.stdout)] + g, first_label=0, label_attribute='label_orignal') for g in (tqdm( + Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] if p == None and q == None: # p and q are uniform distributions as default. def init_worker(gn_toshare): @@ -275,7 +283,7 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(Gn,), n_jobs=n_jobs) + glbv=(Gn,), n_jobs=n_jobs, verbose=verbose) # pbar = tqdm( # total=(1 + len(Gn)) * len(Gn) / 2, @@ -341,7 +349,7 @@ def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, ############################################################################### def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, - node_label, edge_label, eweight, n_jobs): + node_label, edge_label, eweight, n_jobs, verbose=True): """Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. Parameters @@ -393,8 +401,8 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, # else: # reindex nodes using consecutive integers for convenience of kernel calculation. Gn = [nx.convert_node_labels_to_integers( - g, first_label=0, label_attribute='label_orignal') for g in tqdm( - Gn, desc='reindex vertices', file=sys.stdout)] + g, first_label=0, label_attribute='label_orignal') for g in (tqdm( + Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] if p == None and q == None: # p and q are uniform distributions as default. def init_worker(gn_toshare): @@ -403,7 +411,7 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(Gn,), n_jobs=n_jobs) + glbv=(Gn,), n_jobs=n_jobs, verbose=verbose) return Kmatrix @@ -445,7 +453,7 @@ def func_fp(x, p_times, lmda, w_times): ############################################################################### -def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs): +def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, verbose=True): """Calculate walk graph kernels up to n between 2 unlabeled graphs using spectral decomposition method. Labels will be ignored. @@ -469,7 +477,8 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs): # precompute the spectral decomposition of each graph. P_list = [] D_list = [] - for G in tqdm(Gn, desc='spectral decompose', file=sys.stdout): + for G in (tqdm(Gn, desc='spectral decompose', file=sys.stdout) if + verbose else Gn): # don't normalize adjacency matrices if q is a uniform vector. Note # A accually is the transpose of the adjacency matrix. A = nx.adjacency_matrix(G, eweight).todense().transpose() @@ -488,7 +497,8 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs): G_D = D_toshare do_partial = partial(wrapper_sd_do, weight, sub_kernel) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs) + glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs, + verbose=verbose) # pbar = tqdm( diff --git a/pygraph/kernels/spKernel.py b/pygraph/kernels/spKernel.py index 1a2bd82..34724e2 100644 --- a/pygraph/kernels/spKernel.py +++ b/pygraph/kernels/spKernel.py @@ -56,7 +56,8 @@ def spkernel(*args, Gn = args[0] if len(args) == 1 else [args[0], args[1]] weight = None if edge_weight is None: - print('\n None edge weight specified. Set all weight to 1.\n') + if verbose: + print('\n None edge weight specified. Set all weight to 1.\n') else: try: some_weight = list( @@ -64,13 +65,15 @@ def spkernel(*args, if isinstance(some_weight, (float, int)): weight = edge_weight else: - print( - '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' - % edge_weight) + if verbose: + print( + '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' + % edge_weight) except: - print( - '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' - % edge_weight) + if verbose: + print( + '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' + % edge_weight) ds_attrs = get_dataset_attributes( Gn, attr_names=['node_labeled', 'node_attr_dim', 'is_directed'], @@ -83,8 +86,9 @@ def spkernel(*args, idx = [G[0] for G in Gn] Gn = [G[1] for G in Gn] if len(Gn) != len_gn: - print('\n %d graphs are removed as they don\'t contain edges.\n' % - (len_gn - len(Gn))) + if verbose: + print('\n %d graphs are removed as they don\'t contain edges.\n' % + (len_gn - len(Gn))) start_time = time.time() @@ -100,9 +104,12 @@ def spkernel(*args, chunksize = int(len(Gn) / n_jobs) + 1 else: chunksize = 100 - for i, g in tqdm( - pool.imap_unordered(getsp_partial, itr, chunksize), - desc='getting sp graphs', file=sys.stdout): + if verbose: + iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize), + desc='getting sp graphs', file=sys.stdout) + else: + iterator = pool.imap_unordered(getsp_partial, itr, chunksize) + for i, g in iterator: Gn[i] = g pool.close() pool.join() @@ -186,9 +193,10 @@ def spkernel(*args, # Kmatrix[j][i] = kernel run_time = time.time() - start_time - print( - "\n --- shortest path kernel matrix of size %d built in %s seconds ---" - % (len(Gn), run_time)) + if verbose: + print( + "\n --- shortest path kernel matrix of size %d built in %s seconds ---" + % (len(Gn), run_time)) return Kmatrix, run_time, idx diff --git a/pygraph/kernels/structuralspKernel.py b/pygraph/kernels/structuralspKernel.py index 2f9d8a3..574b524 100644 --- a/pygraph/kernels/structuralspKernel.py +++ b/pygraph/kernels/structuralspKernel.py @@ -32,7 +32,8 @@ def structuralspkernel(*args, node_kernels=None, edge_kernels=None, compute_method='naive', - n_jobs=None): + n_jobs=None, + verbose=True): """Calculate mean average structural shortest path kernels between graphs. Parameters @@ -75,7 +76,8 @@ def structuralspkernel(*args, Gn = args[0] if len(args) == 1 else [args[0], args[1]] weight = None if edge_weight is None: - print('\n None edge weight specified. Set all weight to 1.\n') + if verbose: + print('\n None edge weight specified. Set all weight to 1.\n') else: try: some_weight = list( @@ -83,13 +85,15 @@ def structuralspkernel(*args, if isinstance(some_weight, (float, int)): weight = edge_weight else: - print( - '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' - % edge_weight) + if verbose: + print( + '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' + % edge_weight) except: - print( - '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' - % edge_weight) + if verbose: + print( + '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' + % edge_weight) ds_attrs = get_dataset_attributes( Gn, attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', @@ -110,11 +114,13 @@ def structuralspkernel(*args, if compute_method == 'trie': getsp_partial = partial(wrapper_getSP_trie, weight, ds_attrs['is_directed']) else: - getsp_partial = partial(wrapper_getSP_naive, weight, ds_attrs['is_directed']) - for i, sp in tqdm( - pool.imap_unordered(getsp_partial, itr, chunksize), - desc='getting shortest paths', - file=sys.stdout): + getsp_partial = partial(wrapper_getSP_naive, weight, ds_attrs['is_directed']) + if verbose: + iterator = tqdm(pool.imap_unordered(getsp_partial, itr, chunksize), + desc='getting shortest paths', file=sys.stdout) + else: + iterator = pool.imap_unordered(getsp_partial, itr, chunksize) + for i, sp in iterator: splist[i] = sp # time.sleep(10) pool.close() @@ -169,12 +175,12 @@ def structuralspkernel(*args, do_partial = partial(wrapper_ssp_do_trie, ds_attrs, node_label, edge_label, node_kernels, edge_kernels) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(splist, Gn), n_jobs=n_jobs) + glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) else: do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, node_kernels, edge_kernels) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(splist, Gn), n_jobs=n_jobs) + glbv=(splist, Gn), n_jobs=n_jobs, verbose=verbose) # # ---- use pool.map to parallel. ---- # pool = Pool(n_jobs) @@ -233,9 +239,9 @@ def structuralspkernel(*args, # Kmatrix[j][i] = kernel run_time = time.time() - start_time - print( - "\n --- shortest path kernel matrix of size %d built in %s seconds ---" - % (len(Gn), run_time)) + if verbose: + print("\n --- shortest path kernel matrix of size %d built in %s seconds ---" + % (len(Gn), run_time)) return Kmatrix, run_time diff --git a/pygraph/kernels/untilHPathKernel.py b/pygraph/kernels/untilHPathKernel.py index a82378f..5731476 100644 --- a/pygraph/kernels/untilHPathKernel.py +++ b/pygraph/kernels/untilHPathKernel.py @@ -28,7 +28,8 @@ def untilhpathkernel(*args, depth=10, k_func='tanimoto', compute_method='trie', - n_jobs=None): + n_jobs=None, + verbose=True): """Calculate path graph kernels up to depth/hight h between graphs. Parameters ---------- @@ -88,9 +89,12 @@ def untilhpathkernel(*args, else: getps_partial = partial(wrapper_find_all_paths_until_length, depth, ds_attrs, node_label, edge_label) - for i, ps in tqdm( - pool.imap_unordered(getps_partial, itr, chunksize), - desc='getting paths', file=sys.stdout): + if verbose: + iterator = tqdm(pool.imap_unordered(getps_partial, itr, chunksize), + desc='getting paths', file=sys.stdout) + else: + iterator = pool.imap_unordered(getps_partial, itr, chunksize) + for i, ps in iterator: all_paths[i] = ps pool.close() pool.join() @@ -122,14 +126,14 @@ def untilhpathkernel(*args, G_trie = trie_toshare do_partial = partial(wrapper_uhpath_do_trie, k_func) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(all_paths,), n_jobs=n_jobs) + glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) else: def init_worker(plist_toshare): global G_plist G_plist = plist_toshare do_partial = partial(wrapper_uhpath_do_naive, k_func) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, - glbv=(all_paths,), n_jobs=n_jobs) + glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) # # ---- direct running, normally use single CPU core. ---- @@ -167,9 +171,9 @@ def untilhpathkernel(*args, # pbar.update(1) run_time = time.time() - start_time - print( - "\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" - % (depth, len(Gn), run_time)) + if verbose: + print("\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" + % (depth, len(Gn), run_time)) # print(Kmatrix[0][0:10]) return Kmatrix, run_time diff --git a/pygraph/utils/model_selection_precomputed.py b/pygraph/utils/model_selection_precomputed.py index 06e72f6..174b674 100644 --- a/pygraph/utils/model_selection_precomputed.py +++ b/pygraph/utils/model_selection_precomputed.py @@ -32,7 +32,8 @@ def model_selection_for_precomputed_kernel(datafile, extra_params=None, ds_name='ds-unknown', n_jobs=1, - read_gm_from_file=False): + read_gm_from_file=False, + verbose=True): """Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results. Parameters @@ -84,15 +85,17 @@ def model_selection_for_precomputed_kernel(datafile, raise Exception( 'The model type is incorrect! Please choose from regression or classification.' ) - print() - print('--- This is a %s problem ---' % model_type) + if verbose: + print() + print('--- This is a %s problem ---' % model_type) str_fw += 'This is a %s problem.\n' % model_type # calculate gram matrices rather than read them from file. if read_gm_from_file == False: # Load the dataset - print() - print('\n1. Loading dataset from file...') + if verbose: + print() + print('\n1. Loading dataset from file...') if isinstance(datafile, str): dataset, y_all = loadDataset( datafile, filename_y=datafile_y, extra_params=extra_params) @@ -117,14 +120,16 @@ def model_selection_for_precomputed_kernel(datafile, ] # list to store param grids precomputed ignoring the useless ones # calculate all gram matrices - print() - print('2. Calculating gram matrices. This could take a while...') + if verbose: + print() + print('2. Calculating gram matrices. This could take a while...') str_fw += '\nII. Gram matrices.\n\n' tts = time.time() # start training time nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) for idx, params_out in enumerate(param_list_precomputed): y = y_all[:] params_out['n_jobs'] = n_jobs + params_out['verbose'] = verbose # print(dataset) # import networkx as nx # nx.draw_networkx(dataset[1]) @@ -154,23 +159,27 @@ def model_selection_for_precomputed_kernel(datafile, for j in range(i, len(Kmatrix)): Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) Kmatrix[j][i] = Kmatrix[i][j] - - print() + if verbose: + print() if params_out == {}: - print('the gram matrix is: ') + if verbose: + print('the gram matrix is: ') str_fw += 'the gram matrix is:\n\n' else: - print('the gram matrix with parameters', params_out, 'is: \n\n') + if verbose: + print('the gram matrix with parameters', params_out, 'is: \n\n') str_fw += 'the gram matrix with parameters %s is:\n\n' % params_out if len(Kmatrix) < 2: nb_gm_ignore += 1 - print('ignored, as at most only one of all its diagonal value is non-zero.') + if verbose: + print('ignored, as at most only one of all its diagonal value is non-zero.') str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\n\n' else: if np.isnan(Kmatrix).any( ): # if the matrix contains elements that are not numbers nb_gm_ignore += 1 - print('ignored, as it contains elements that are not numbers.') + if verbose: + print('ignored, as it contains elements that are not numbers.') str_fw += 'ignored, as it contains elements that are not numbers.\n\n' else: # print(Kmatrix) @@ -193,10 +202,12 @@ def model_selection_for_precomputed_kernel(datafile, gram_matrix_time.append(current_run_time) param_list_pre_revised.append(params_out) if nb_g_ignore > 0: - print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore) + if verbose: + print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore) str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore - print() - print( + if verbose: + print() + print( '{} gram matrices are calculated, {} of which are ignored.'.format( len(param_list_precomputed), nb_gm_ignore)) str_fw += '{} gram matrices are calculated, {} of which are ignored.\n\n'.format(len(param_list_precomputed), nb_gm_ignore) @@ -205,20 +216,22 @@ def model_selection_for_precomputed_kernel(datafile, '{}: {}\n'.format(idx, params_out) for idx, params_out in enumerate(param_list_precomputed) ]) - - print() + + if verbose: + print() if len(gram_matrices) == 0: - print('all gram matrices are ignored, no results obtained.') + if verbose: + print('all gram matrices are ignored, no results obtained.') str_fw += '\nall gram matrices are ignored, no results obtained.\n\n' else: # save gram matrices to file. np.savez(results_dir + '/' + ds_name + '.gm', gms=gram_matrices, params=param_list_pre_revised, y=y, gmtime=gram_matrix_time) - - print( + if verbose: + print( '3. Fitting and predicting using nested cross validation. This could really take a while...' - ) + ) # ---- use pool.imap_unordered to parallel and track progress. ---- # train_pref = [] @@ -252,7 +265,12 @@ def model_selection_for_precomputed_kernel(datafile, # else: # chunksize = 1000 chunksize = 1 - for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): + if verbose: + iterator = tqdm(pool.imap_unordered(trial_do_partial, + range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout) + else: + iterator = pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize) + for o1, o2, o3 in iterator: train_pref.append(o1) val_pref.append(o2) test_pref.append(o3) @@ -278,8 +296,9 @@ def model_selection_for_precomputed_kernel(datafile, # test_pref.append(o3) # print() - print() - print('4. Getting final performance...') + if verbose: + print() + print('4. Getting final performance...') str_fw += '\nIII. Performance.\n\n' # averages and confidences of performances on outer trials for each combination of parameters average_train_scores = np.mean(train_pref, axis=0) @@ -311,11 +330,12 @@ def model_selection_for_precomputed_kernel(datafile, param_list_pre_revised[i] for i in best_params_index[0] ] best_params_in = [param_list[i] for i in best_params_index[1]] - print('best_params_out: ', best_params_out) - print('best_params_in: ', best_params_in) - print() - print('best_val_perf: ', best_val_perf) - print('best_val_std: ', min_val_std) + if verbose: + print('best_params_out: ', best_params_out) + print('best_params_in: ', best_params_in) + print() + print('best_val_perf: ', best_val_perf) + print('best_val_std: ', min_val_std) str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in str_fw += 'best_val_perf: %s\n' % best_val_perf @@ -332,8 +352,9 @@ def model_selection_for_precomputed_kernel(datafile, std_perf_scores[value][best_params_index[1][idx]] for idx, value in enumerate(best_params_index[0]) ] - print('final_performance: ', final_performance) - print('final_confidence: ', final_confidence) + if verbose: + print('final_performance: ', final_performance) + print('final_confidence: ', final_confidence) str_fw += 'final_performance: %s\n' % final_performance str_fw += 'final_confidence: %s\n' % final_confidence train_performance = [ @@ -344,28 +365,29 @@ def model_selection_for_precomputed_kernel(datafile, std_train_scores[value][best_params_index[1][idx]] for idx, value in enumerate(best_params_index[0]) ] - print('train_performance: %s' % train_performance) - print('train_std: ', train_std) + if verbose: + print('train_performance: %s' % train_performance) + print('train_std: ', train_std) str_fw += 'train_performance: %s\n' % train_performance str_fw += 'train_std: %s\n\n' % train_std - - print() + + if verbose: + print() tt_total = time.time() - tts # training time for all hyper-parameters average_gram_matrix_time = np.mean(gram_matrix_time) - std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) + std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) if len(gram_matrix_time) > 1 else 0 best_gram_matrix_time = [ gram_matrix_time[i] for i in best_params_index[0] ] ave_bgmt = np.mean(best_gram_matrix_time) - std_bgmt = np.std(best_gram_matrix_time, ddof=1) - print( - 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' - .format(average_gram_matrix_time, std_gram_matrix_time)) - print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( - ave_bgmt, std_bgmt)) - print( - 'total training time with all hyper-param choices: {:.2f}s'.format( - tt_total)) + std_bgmt = np.std(best_gram_matrix_time, ddof=1) if len(best_gram_matrix_time) > 1 else 0 + if verbose: + print('time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' + .format(average_gram_matrix_time, std_gram_matrix_time)) + print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( + ave_bgmt, std_bgmt)) + print('total training time with all hyper-param choices: {:.2f}s'.format( + tt_total)) str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time) str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) str_fw += 'total training time with all hyper-param choices: {:.2f}s\n\n'.format(tt_total) @@ -437,7 +459,8 @@ def model_selection_for_precomputed_kernel(datafile, 'params', 'train_perf', 'valid_perf', 'test_perf', 'gram_matrix_time' ] - print() + if verbose: + print() tb_print = tabulate( OrderedDict( sorted(table_dict.items(), @@ -453,8 +476,9 @@ def model_selection_for_precomputed_kernel(datafile, param_list = list(ParameterGrid(param_grid)) # read gram matrices from file. - print() - print('2. Reading gram matrices from file...') + if verbose: + print() + print('2. Reading gram matrices from file...') str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n' gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed @@ -464,9 +488,10 @@ def model_selection_for_precomputed_kernel(datafile, tts = time.time() # start training time # nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) - print( - '3. Fitting and predicting using nested cross validation. This could really take a while...' - ) + if verbose: + print( + '3. Fitting and predicting using nested cross validation. This could really take a while...' + ) # ---- use pool.imap_unordered to parallel and track progress. ---- def init_worker(gms_toshare): @@ -479,7 +504,12 @@ def model_selection_for_precomputed_kernel(datafile, val_pref = [] test_pref = [] chunksize = 1 - for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout): + if verbose: + iterator = tqdm(pool.imap_unordered(trial_do_partial, + range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout) + else: + iterator = pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize) + for o1, o2, o3 in iterator: train_pref.append(o1) val_pref.append(o2) test_pref.append(o3) @@ -509,8 +539,9 @@ def model_selection_for_precomputed_kernel(datafile, # val_pref.append(o2) # test_pref.append(o3) - print() - print('4. Getting final performance...') + if verbose: + print() + print('4. Getting final performance...') str_fw += '\nIII. Performance.\n\n' # averages and confidences of performances on outer trials for each combination of parameters average_train_scores = np.mean(train_pref, axis=0) @@ -537,11 +568,12 @@ def model_selection_for_precomputed_kernel(datafile, param_list_pre_revised[i] for i in best_params_index[0] ] best_params_in = [param_list[i] for i in best_params_index[1]] - print('best_params_out: ', best_params_out) - print('best_params_in: ', best_params_in) - print() - print('best_val_perf: ', best_val_perf) - print('best_val_std: ', min_val_std) + if verbose: + print('best_params_out: ', best_params_out) + print('best_params_in: ', best_params_in) + print() + print('best_val_perf: ', best_val_perf) + print('best_val_std: ', min_val_std) str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in str_fw += 'best_val_perf: %s\n' % best_val_perf @@ -555,8 +587,9 @@ def model_selection_for_precomputed_kernel(datafile, std_perf_scores[value][best_params_index[1][idx]] for idx, value in enumerate(best_params_index[0]) ] - print('final_performance: ', final_performance) - print('final_confidence: ', final_confidence) + if verbose: + print('final_performance: ', final_performance) + print('final_confidence: ', final_confidence) str_fw += 'final_performance: %s\n' % final_performance str_fw += 'final_confidence: %s\n' % final_confidence train_performance = [ @@ -567,30 +600,34 @@ def model_selection_for_precomputed_kernel(datafile, std_train_scores[value][best_params_index[1][idx]] for idx, value in enumerate(best_params_index[0]) ] - print('train_performance: %s' % train_performance) - print('train_std: ', train_std) + if verbose: + print('train_performance: %s' % train_performance) + print('train_std: ', train_std) str_fw += 'train_performance: %s\n' % train_performance str_fw += 'train_std: %s\n\n' % train_std - print() + if verbose: + print() average_gram_matrix_time = np.mean(gram_matrix_time) - std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) + std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) if len(gram_matrix_time) > 1 else 0 best_gram_matrix_time = [ gram_matrix_time[i] for i in best_params_index[0] ] ave_bgmt = np.mean(best_gram_matrix_time) - std_bgmt = np.std(best_gram_matrix_time, ddof=1) - print( - 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' - .format(average_gram_matrix_time, std_gram_matrix_time)) - print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( - ave_bgmt, std_bgmt)) + std_bgmt = np.std(best_gram_matrix_time, ddof=1) if len(best_gram_matrix_time) > 1 else 0 + if verbose: + print( + 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s' + .format(average_gram_matrix_time, std_gram_matrix_time)) + print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format( + ave_bgmt, std_bgmt)) tt_poster = time.time() - tts # training time with hyper-param choices who did not participate in calculation of gram matrices - print( - 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format( - tt_poster)) - print('total training time with all hyper-param choices: {:.2f}s'.format( - tt_poster + np.sum(gram_matrix_time))) + if verbose: + print( + 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format( + tt_poster)) + print('total training time with all hyper-param choices: {:.2f}s'.format( + tt_poster + np.sum(gram_matrix_time))) # str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time) # str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt) str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster) @@ -633,7 +670,8 @@ def model_selection_for_precomputed_kernel(datafile, keyorder = [ 'params', 'train_perf', 'valid_perf', 'test_perf' ] - print() + if verbose: + print() tb_print = tabulate( OrderedDict( sorted(table_dict.items(),