rearrange notebooks directory, correct all scripts.

6 years ago · ddd921696a
--- a/notebooks/else/job_graphkernels.sl
+++ b/notebooks/else/job_graphkernels.sl
--- a/notebooks/else/job_test.sl
+++ b/notebooks/else/job_test.sl
--- a/notebooks/else/run_rwalk_symonly.py
+++ b/notebooks/else/run_rwalk_symonly.py
--- a/notebooks/else/run_sp_symonly.py
+++ b/notebooks/else/run_sp_symonly.py
--- a/notebooks/else/run_ssp_symonly.py
+++ b/notebooks/else/run_ssp_symonly.py
--- a/notebooks/get_dataset_attributes.py
+++ b/notebooks/get_dataset_attributes.py
@@ -1,62 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct 17 16:07:38 2018

@author: ljia
 """

 import sys
 sys.path.insert(0, "../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.graphdataset import get_dataset_attributes

 dslist = [
    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},
    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',
        'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},
    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
        'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},
    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
    {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},
    {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},
    {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},
    {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},
    {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'},
    {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},
    {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},
    {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},
    {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'},    
    {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},
    {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'},   
    {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',
        'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},
    {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',
        'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},
    {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',
        'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',},

 #     # not working below
 #     {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},
 #     {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},
 #     {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},
 #     {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},
 ]

 for ds in dslist:
    dataset, y = loadDataset(
        ds['dataset'],
        filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
        extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
    attrs = get_dataset_attributes(
        dataset, target=y, node_label='atom', edge_label='bond_type')
    print()
    print(ds['name'] + ':')
    for atr in attrs:
        print(atr, ':', attrs[atr])
    print()
--- a/notebooks/memory_profile.ipynb
+++ b/notebooks/memory_profile.ipynb
@@ -1,815 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Acyclic\n",
      "\n",
      "--- This is a regression problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting sp graphs: 183it [00:00, 2198.32it/s]\n",
      "calculating kernels: 16836it [00:17, 983.99it/s] \n",
      "\n",
      " --- shortest path kernel matrix of size 183 built in 17.32457208633423 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f63ab934158>, 'nsymb': <function gaussiankernel at 0x7f63ab9987b8>, 'mix': functools.partial(<function kernelproduct at 0x7f63ab951158>, <function deltakernel at 0x7f63ab934158>, <function gaussiankernel at 0x7f63ab9987b8>)}, 'n_jobs': 8} is: \n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 30it [00:12,  2.48it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "best_params_out:  [{'node_kernels': {'symb': <function deltakernel at 0x7f63ab934158>, 'nsymb': <function gaussiankernel at 0x7f63ab9987b8>, 'mix': functools.partial(<function kernelproduct at 0x7f63ab951158>, <function deltakernel at 0x7f63ab934158>, <function gaussiankernel at 0x7f63ab9987b8>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'alpha': 3.1622776601683795e-10}]\n",
      "\n",
      "best_val_perf:  9.64631220504699\n",
      "best_val_std:  0.6555235266552757\n",
      "final_performance:  [9.306976995404987]\n",
      "final_confidence:  [2.317244919360123]\n",
      "train_performance: [6.190191405968441]\n",
      "train_std:  [0.21512408952827894]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 17.32±nans\n",
      "time to calculate best gram matrix: 17.32±nans\n",
      "total training time with all hyper-param choices: 33.16s\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n",
      "  keepdims=keepdims)\n",
      "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  ret = ret.dtype.type(ret / rcount)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Filename: ../pygraph/utils/model_selection_precomputed.py\n",
      "\n",
      "Line #    Mem usage    Increment   Line Contents\n",
      "================================================\n",
      "    24    115.1 MiB    115.1 MiB   @profile\n",
      "    25                             def model_selection_for_precomputed_kernel(datafile,\n",
      "    26                                                                        estimator,\n",
      "    27                                                                        param_grid_precomputed,\n",
      "    28                                                                        param_grid,\n",
      "    29                                                                        model_type,\n",
      "    30                                                                        NUM_TRIALS=30,\n",
      "    31                                                                        datafile_y=None,\n",
      "    32                                                                        extra_params=None,\n",
      "    33                                                                        ds_name='ds-unknown',\n",
      "    34                                                                        n_jobs=1,\n",
      "    35                                                                        read_gm_from_file=False):\n",
      "    36                                 \"\"\"Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results.\n",
      "    37                             \n",
      "    38                                 Parameters\n",
      "    39                                 ----------\n",
      "    40                                 datafile : string\n",
      "    41                                     Path of dataset file.\n",
      "    42                                 estimator : function\n",
      "    43                                     kernel function used to estimate. This function needs to return a gram matrix.\n",
      "    44                                 param_grid_precomputed : dictionary\n",
      "    45                                     Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n",
      "    46                                 param_grid : dictionary\n",
      "    47                                     Dictionary with names (string) of parameters used as penelties as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n",
      "    48                                 model_type : string\n",
      "    49                                     Typr of the problem, can be regression or classification.\n",
      "    50                                 NUM_TRIALS : integer\n",
      "    51                                     Number of random trials of outer cv loop. The default is 30.\n",
      "    52                                 datafile_y : string\n",
      "    53                                     Path of file storing y data. This parameter is optional depending on the given dataset file.\n",
      "    54                                 read_gm_from_file : boolean\n",
      "    55                                     Whether gram matrices are loaded from file.\n",
      "    56                             \n",
      "    57                                 Examples\n",
      "    58                                 --------\n",
      "    59                                 >>> import numpy as np\n",
      "    60                                 >>> import sys\n",
      "    61                                 >>> sys.path.insert(0, \"../\")\n",
      "    62                                 >>> from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n",
      "    63                                 >>> from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n",
      "    64                                 >>>\n",
      "    65                                 >>> datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
      "    66                                 >>> estimator = weisfeilerlehmankernel\n",
      "    67                                 >>> param_grid_precomputed = {'height': [0,1,2,3,4,5,6,7,8,9,10], 'base_kernel': ['subtree']}\n",
      "    68                                 >>> param_grid = {\"alpha\": np.logspace(-2, 2, num = 10, base = 10)}\n",
      "    69                                 >>>\n",
      "    70                                 >>> model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, 'regression')\n",
      "    71                                 \"\"\"\n",
      "    72    115.1 MiB      0.0 MiB       tqdm.monitor_interval = 0\n",
      "    73                             \n",
      "    74    115.1 MiB      0.0 MiB       results_dir = '../notebooks/results/' + estimator.__name__\n",
      "    75    115.1 MiB      0.0 MiB       if not os.path.exists(results_dir):\n",
      "    76                                     os.makedirs(results_dir)\n",
      "    77                                 # a string to save all the results.\n",
      "    78    115.1 MiB      0.0 MiB       str_fw = '###################### log time: ' + datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\") + '. ######################\\n\\n'\n",
      "    79    115.1 MiB      0.0 MiB       str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\\n# including gram matrices, serial numbers for gram matrix figures and performance.\\n\\n'\n",
      "    80                             \n",
      "    81                                 # setup the model type\n",
      "    82    115.1 MiB      0.0 MiB       model_type = model_type.lower()\n",
      "    83    115.1 MiB      0.0 MiB       if model_type != 'regression' and model_type != 'classification':\n",
      "    84                                     raise Exception(\n",
      "    85                                         'The model type is incorrect! Please choose from regression or classification.'\n",
      "    86                                     )\n",
      "    87    115.1 MiB      0.0 MiB       print()\n",
      "    88    115.1 MiB      0.0 MiB       print('--- This is a %s problem ---' % model_type)\n",
      "    89    115.1 MiB      0.0 MiB       str_fw += 'This is a %s problem.\\n' % model_type\n",
      "    90                                 \n",
      "    91                                 # calculate gram matrices rather than read them from file.\n",
      "    92    115.1 MiB      0.0 MiB       if read_gm_from_file == False:\n",
      "    93                                     # Load the dataset\n",
      "    94    115.1 MiB      0.0 MiB           print()\n",
      "    95    115.1 MiB      0.0 MiB           print('\\n1. Loading dataset from file...')\n",
      "    96    115.1 MiB      0.0 MiB           if isinstance(datafile, str):\n",
      "    97    115.1 MiB      0.0 MiB               dataset, y_all = loadDataset(\n",
      "    98    116.3 MiB      1.2 MiB                       datafile, filename_y=datafile_y, extra_params=extra_params)\n",
      "    99                                     else: # load data directly from variable.\n",
      "   100                                         dataset = datafile\n",
      "   101                                         y_all = datafile_y                \n",
      "   102                             \n",
      "   103                                     #     import matplotlib.pyplot as plt\n",
      "   104                                     #     import networkx as nx\n",
      "   105                                     #     nx.draw_networkx(dataset[30])\n",
      "   106                                     #     plt.show()\n",
      "   107                                 \n",
      "   108                                     # Grid of parameters with a discrete number of values for each.\n",
      "   109    116.3 MiB      0.0 MiB           param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n",
      "   110    116.3 MiB      0.0 MiB           param_list = list(ParameterGrid(param_grid))\n",
      "   111                                 \n",
      "   112    116.3 MiB      0.0 MiB           gram_matrices = [\n",
      "   113                                     ]  # a list to store gram matrices for all param_grid_precomputed\n",
      "   114    116.3 MiB      0.0 MiB           gram_matrix_time = [\n",
      "   115                                     ]  # a list to store time to calculate gram matrices\n",
      "   116    116.3 MiB      0.0 MiB           param_list_pre_revised = [\n",
      "   117                                     ]  # list to store param grids precomputed ignoring the useless ones\n",
      "   118                                 \n",
      "   119                                     # calculate all gram matrices\n",
      "   120    116.3 MiB      0.0 MiB           print()\n",
      "   121    116.3 MiB      0.0 MiB           print('2. Calculating gram matrices. This could take a while...')\n",
      "   122    116.3 MiB      0.0 MiB           str_fw += '\\nII. Gram matrices.\\n\\n'\n",
      "   123    116.3 MiB      0.0 MiB           tts = time.time()  # start training time\n",
      "   124    116.3 MiB      0.0 MiB           nb_gm_ignore = 0  # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)\n",
      "   125    144.8 MiB      0.0 MiB           for idx, params_out in enumerate(param_list_precomputed):\n",
      "   126    116.3 MiB      0.0 MiB               y = y_all[:]\n",
      "   127    116.3 MiB      0.0 MiB               params_out['n_jobs'] = n_jobs\n",
      "   128                             #            print(dataset)\n",
      "   129                             #            import networkx as nx\n",
      "   130                             #            nx.draw_networkx(dataset[1])\n",
      "   131                             #            plt.show()\n",
      "   132    119.1 MiB      2.8 MiB               rtn_data = estimator(dataset[:], **params_out)\n",
      "   133    119.1 MiB      0.0 MiB               Kmatrix = rtn_data[0]\n",
      "   134    119.1 MiB      0.0 MiB               current_run_time = rtn_data[1]\n",
      "   135                                         # for some kernels, some graphs in datasets may not meet the \n",
      "   136                                         # kernels' requirements for graph structure. These graphs are trimmed. \n",
      "   137    119.1 MiB      0.0 MiB               if len(rtn_data) == 3:\n",
      "   138    119.1 MiB      0.0 MiB                   idx_trim = rtn_data[2]  # the index of trimmed graph list\n",
      "   139    119.1 MiB      0.0 MiB                   y = [y[idxt] for idxt in idx_trim] # trim y accordingly\n",
      "   140                             #            Kmatrix = np.random.rand(2250, 2250)\n",
      "   141                             #            current_run_time = 0.1\n",
      "   142                                 \n",
      "   143    119.1 MiB      0.0 MiB               Kmatrix_diag = Kmatrix.diagonal().copy()\n",
      "   144                                         # remove graphs whose kernels with themselves are zeros\n",
      "   145    119.1 MiB      0.0 MiB               nb_g_ignore = 0\n",
      "   146    119.1 MiB      0.0 MiB               for idxk, diag in enumerate(Kmatrix_diag):\n",
      "   147    119.1 MiB      0.0 MiB                   if diag == 0:\n",
      "   148                                                 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)\n",
      "   149                                                 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)\n",
      "   150                                                 nb_g_ignore += 1\n",
      "   151                                         # normalization\n",
      "   152    119.1 MiB      0.0 MiB               for i in range(len(Kmatrix)):\n",
      "   153    119.1 MiB      0.0 MiB                   for j in range(i, len(Kmatrix)):\n",
      "   154    119.1 MiB      0.0 MiB                       Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])\n",
      "   155    119.1 MiB      0.0 MiB                       Kmatrix[j][i] = Kmatrix[i][j]\n",
      "   156                                 \n",
      "   157    119.1 MiB      0.0 MiB               print()\n",
      "   158    119.1 MiB      0.0 MiB               if params_out == {}:\n",
      "   159                                             print('the gram matrix is: ')\n",
      "   160                                             str_fw += 'the gram matrix is:\\n\\n'\n",
      "   161                                         else:\n",
      "   162    119.1 MiB      0.0 MiB                   print('the gram matrix with parameters', params_out, 'is: ')\n",
      "   163    119.1 MiB      0.0 MiB                   str_fw += 'the gram matrix with parameters %s is:\\n\\n' % params_out\n",
      "   164    119.1 MiB      0.0 MiB               if len(Kmatrix) < 2:\n",
      "   165                                             nb_gm_ignore += 1\n",
      "   166                                             print('ignored, as at most only one of all its diagonal value is non-zero.')\n",
      "   167                                             str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\\n\\n'\n",
      "   168                                         else:                \n",
      "   169    119.1 MiB      0.0 MiB                   if np.isnan(Kmatrix).any(\n",
      "   170                                             ):  # if the matrix contains elements that are not numbers\n",
      "   171                                                 nb_gm_ignore += 1\n",
      "   172                                                 print('ignored, as it contains elements that are not numbers.')\n",
      "   173                                                 str_fw += 'ignored, as it contains elements that are not numbers.\\n\\n'\n",
      "   174                                             else:\n",
      "   175                             #                    print(Kmatrix)\n",
      "   176    119.1 MiB      0.0 MiB                       str_fw += np.array2string(\n",
      "   177    119.1 MiB      0.0 MiB                               Kmatrix,\n",
      "   178    119.1 MiB      0.0 MiB                               separator=',') + '\\n\\n'\n",
      "   179                             #                            separator=',',\n",
      "   180                             #                            threshold=np.inf,\n",
      "   181                             #                            floatmode='unique') + '\\n\\n'\n",
      "   182                             \n",
      "   183    119.1 MiB      0.0 MiB                       fig_file_name = results_dir + '/GM[ds]' + ds_name\n",
      "   184    119.1 MiB      0.0 MiB                       if params_out != {}:\n",
      "   185    119.1 MiB      0.0 MiB                           fig_file_name += '[params]' + str(idx)\n",
      "   186    119.8 MiB      0.7 MiB                       plt.imshow(Kmatrix)\n",
      "   187    119.9 MiB      0.1 MiB                       plt.colorbar()\n",
      "   188    144.8 MiB     24.9 MiB                       plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)\n",
      "   189                             #                    plt.show()\n",
      "   190    144.8 MiB      0.0 MiB                       plt.clf()\n",
      "   191    144.8 MiB      0.0 MiB                       gram_matrices.append(Kmatrix)\n",
      "   192    144.8 MiB      0.0 MiB                       gram_matrix_time.append(current_run_time)\n",
      "   193    144.8 MiB      0.0 MiB                       param_list_pre_revised.append(params_out)\n",
      "   194    144.8 MiB      0.0 MiB                       if nb_g_ignore > 0:\n",
      "   195                                                     print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)\n",
      "   196                                                     str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore\n",
      "   197    144.8 MiB      0.0 MiB           print()\n",
      "   198    144.8 MiB      0.0 MiB           print(\n",
      "   199    144.8 MiB      0.0 MiB               '{} gram matrices are calculated, {} of which are ignored.'.format(\n",
      "   200    144.8 MiB      0.0 MiB                   len(param_list_precomputed), nb_gm_ignore))\n",
      "   201    144.8 MiB      0.0 MiB           str_fw += '{} gram matrices are calculated, {} of which are ignored.\\n\\n'.format(len(param_list_precomputed), nb_gm_ignore)\n",
      "   202    144.8 MiB      0.0 MiB           str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\\n\\n'\n",
      "   203    144.8 MiB      0.0 MiB           str_fw += ''.join([\n",
      "   204    144.8 MiB      0.0 MiB               '{}: {}\\n'.format(idx, params_out)\n",
      "   205    144.8 MiB      0.0 MiB               for idx, params_out in enumerate(param_list_precomputed)\n",
      "   206                                     ])\n",
      "   207                                 \n",
      "   208    144.8 MiB      0.0 MiB           print()\n",
      "   209    144.8 MiB      0.0 MiB           if len(gram_matrices) == 0:\n",
      "   210                                         print('all gram matrices are ignored, no results obtained.')\n",
      "   211                                         str_fw += '\\nall gram matrices are ignored, no results obtained.\\n\\n'\n",
      "   212                                     else:\n",
      "   213                                         # save gram matrices to file.\n",
      "   214    144.8 MiB      0.0 MiB               np.savez(results_dir + '/' + ds_name + '.gm', \n",
      "   215    144.8 MiB      0.0 MiB                        gms=gram_matrices, params=param_list_pre_revised, y=y, \n",
      "   216    144.9 MiB      0.1 MiB                        gmtime=gram_matrix_time)\n",
      "   217                                         \n",
      "   218    144.9 MiB      0.0 MiB               print(\n",
      "   219    144.9 MiB      0.0 MiB                   '3. Fitting and predicting using nested cross validation. This could really take a while...'\n",
      "   220                                         )\n",
      "   221                                         \n",
      "   222                                         # ---- use pool.imap_unordered to parallel and track progress. ----\n",
      "   223                             #            train_pref = []\n",
      "   224                             #            val_pref = []\n",
      "   225                             #            test_pref = []\n",
      "   226                             #            def func_assign(result, var_to_assign):\n",
      "   227                             #                for idx, itm in enumerate(var_to_assign):\n",
      "   228                             #                    itm.append(result[idx])                \n",
      "   229                             #            trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n",
      "   230                             #                      \n",
      "   231                             #            parallel_me(trial_do_partial, range(NUM_TRIALS), func_assign, \n",
      "   232                             #                        [train_pref, val_pref, test_pref], glbv=gram_matrices,\n",
      "   233                             #                        method='imap_unordered', n_jobs=n_jobs, chunksize=1,\n",
      "   234                             #                        itr_desc='cross validation')\n",
      "   235                                         \n",
      "   236    144.9 MiB      0.0 MiB               def init_worker(gms_toshare):\n",
      "   237                                             global G_gms\n",
      "   238                                             G_gms = gms_toshare\n",
      "   239                                         \n",
      "   240                             #            gram_matrices = np.array(gram_matrices)\n",
      "   241                             #            gms_shape = gram_matrices.shape\n",
      "   242                             #            gms_array = Array('d', np.reshape(gram_matrices.copy(), -1, order='C'))\n",
      "   243                             #            pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gms_array, gms_shape))\n",
      "   244    144.9 MiB      0.1 MiB               pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n",
      "   245    144.9 MiB      0.0 MiB               trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n",
      "   246    144.9 MiB      0.0 MiB               train_pref = []\n",
      "   247    144.9 MiB      0.0 MiB               val_pref = []\n",
      "   248    144.9 MiB      0.0 MiB               test_pref = []\n",
      "   249                             #            if NUM_TRIALS < 1000 * n_jobs:\n",
      "   250                             #                chunksize = int(NUM_TRIALS / n_jobs) + 1\n",
      "   251                             #            else:\n",
      "   252                             #                chunksize = 1000\n",
      "   253    144.9 MiB      0.0 MiB               chunksize = 1\n",
      "   254    145.1 MiB      0.1 MiB               for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n",
      "   255    145.1 MiB      0.0 MiB                   train_pref.append(o1)\n",
      "   256    145.1 MiB      0.0 MiB                   val_pref.append(o2)\n",
      "   257    145.1 MiB      0.0 MiB                   test_pref.append(o3)\n",
      "   258    145.1 MiB      0.0 MiB               pool.close()\n",
      "   259    145.1 MiB      0.0 MiB               pool.join()\n",
      "   260                                 \n",
      "   261                             #            # ---- use pool.map to parallel. ----\n",
      "   262                             #            pool =  Pool(n_jobs)\n",
      "   263                             #            trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y[0:250], model_type)\n",
      "   264                             #            result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n",
      "   265                             #            train_pref = [item[0] for item in result_perf]\n",
      "   266                             #            val_pref = [item[1] for item in result_perf]\n",
      "   267                             #            test_pref = [item[2] for item in result_perf]\n",
      "   268                                 \n",
      "   269                             #            # ---- direct running, normally use a single CPU core. ----\n",
      "   270                             #            train_pref = []\n",
      "   271                             #            val_pref = []\n",
      "   272                             #            test_pref = []\n",
      "   273                             #            for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n",
      "   274                             #                o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n",
      "   275                             #                train_pref.append(o1)\n",
      "   276                             #                val_pref.append(o2)\n",
      "   277                             #                test_pref.append(o3)\n",
      "   278                             #            print()\n",
      "   279                                 \n",
      "   280    145.1 MiB      0.0 MiB               print()\n",
      "   281    145.1 MiB      0.0 MiB               print('4. Getting final performance...')\n",
      "   282    145.1 MiB      0.0 MiB               str_fw += '\\nIII. Performance.\\n\\n'\n",
      "   283                                         # averages and confidences of performances on outer trials for each combination of parameters\n",
      "   284    145.1 MiB      0.0 MiB               average_train_scores = np.mean(train_pref, axis=0)\n",
      "   285                             #            print('val_pref: ', val_pref[0][0])\n",
      "   286    145.1 MiB      0.0 MiB               average_val_scores = np.mean(val_pref, axis=0)\n",
      "   287                             #            print('test_pref: ', test_pref[0][0])\n",
      "   288    145.1 MiB      0.0 MiB               average_perf_scores = np.mean(test_pref, axis=0)\n",
      "   289                                         # sample std is used here\n",
      "   290    145.1 MiB      0.0 MiB               std_train_scores = np.std(train_pref, axis=0, ddof=1)\n",
      "   291    145.1 MiB      0.0 MiB               std_val_scores = np.std(val_pref, axis=0, ddof=1)\n",
      "   292    145.1 MiB      0.0 MiB               std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n",
      "   293                                 \n",
      "   294    145.1 MiB      0.0 MiB               if model_type == 'regression':\n",
      "   295    145.1 MiB      0.0 MiB                   best_val_perf = np.amin(average_val_scores)\n",
      "   296                                         else:\n",
      "   297                                             best_val_perf = np.amax(average_val_scores)\n",
      "   298                             #            print('average_val_scores: ', average_val_scores)\n",
      "   299                             #            print('best_val_perf: ', best_val_perf)\n",
      "   300                             #            print()\n",
      "   301    145.1 MiB      0.0 MiB               best_params_index = np.where(average_val_scores == best_val_perf)\n",
      "   302                                         # find smallest val std with best val perf.\n",
      "   303                                         best_val_stds = [\n",
      "   304    145.1 MiB      0.0 MiB                   std_val_scores[value][best_params_index[1][idx]]\n",
      "   305    145.1 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   306                                         ]\n",
      "   307    145.1 MiB      0.0 MiB               min_val_std = np.amin(best_val_stds)\n",
      "   308    145.1 MiB      0.0 MiB               best_params_index = np.where(std_val_scores == min_val_std)\n",
      "   309                                         best_params_out = [\n",
      "   310    145.1 MiB      0.0 MiB                   param_list_pre_revised[i] for i in best_params_index[0]\n",
      "   311                                         ]\n",
      "   312    145.1 MiB      0.0 MiB               best_params_in = [param_list[i] for i in best_params_index[1]]\n",
      "   313    145.1 MiB      0.0 MiB               print('best_params_out: ', best_params_out)\n",
      "   314    145.1 MiB      0.0 MiB               print('best_params_in: ', best_params_in)\n",
      "   315    145.1 MiB      0.0 MiB               print()\n",
      "   316    145.1 MiB      0.0 MiB               print('best_val_perf: ', best_val_perf)\n",
      "   317    145.1 MiB      0.0 MiB               print('best_val_std: ', min_val_std)\n",
      "   318    145.1 MiB      0.0 MiB               str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n",
      "   319    145.1 MiB      0.0 MiB               str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n",
      "   320    145.1 MiB      0.0 MiB               str_fw += 'best_val_perf: %s\\n' % best_val_perf\n",
      "   321    145.1 MiB      0.0 MiB               str_fw += 'best_val_std: %s\\n' % min_val_std\n",
      "   322                                 \n",
      "   323                             #            print(best_params_index)\n",
      "   324                             #            print(best_params_index[0])\n",
      "   325                             #            print(average_perf_scores)\n",
      "   326                                         final_performance = [\n",
      "   327    145.1 MiB      0.0 MiB                   average_perf_scores[value][best_params_index[1][idx]]\n",
      "   328    145.1 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   329                                         ]\n",
      "   330                                         final_confidence = [\n",
      "   331    145.1 MiB      0.0 MiB                   std_perf_scores[value][best_params_index[1][idx]]\n",
      "   332    145.1 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   333                                         ]\n",
      "   334    145.1 MiB      0.0 MiB               print('final_performance: ', final_performance)\n",
      "   335    145.1 MiB      0.0 MiB               print('final_confidence: ', final_confidence)\n",
      "   336    145.1 MiB      0.0 MiB               str_fw += 'final_performance: %s\\n' % final_performance\n",
      "   337    145.1 MiB      0.0 MiB               str_fw += 'final_confidence: %s\\n' % final_confidence\n",
      "   338                                         train_performance = [\n",
      "   339    145.1 MiB      0.0 MiB                   average_train_scores[value][best_params_index[1][idx]]\n",
      "   340    145.1 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   341                                         ]\n",
      "   342                                         train_std = [\n",
      "   343    145.1 MiB      0.0 MiB                   std_train_scores[value][best_params_index[1][idx]]\n",
      "   344    145.1 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   345                                         ]\n",
      "   346    145.1 MiB      0.0 MiB               print('train_performance: %s' % train_performance)\n",
      "   347    145.1 MiB      0.0 MiB               print('train_std: ', train_std)\n",
      "   348    145.1 MiB      0.0 MiB               str_fw += 'train_performance: %s\\n' % train_performance\n",
      "   349    145.1 MiB      0.0 MiB               str_fw += 'train_std: %s\\n\\n' % train_std\n",
      "   350                                 \n",
      "   351    145.1 MiB      0.0 MiB               print()\n",
      "   352    145.1 MiB      0.0 MiB               tt_total = time.time() - tts  # training time for all hyper-parameters\n",
      "   353    145.1 MiB      0.0 MiB               average_gram_matrix_time = np.mean(gram_matrix_time)\n",
      "   354    145.1 MiB      0.0 MiB               std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n",
      "   355                                         best_gram_matrix_time = [\n",
      "   356    145.1 MiB      0.0 MiB                   gram_matrix_time[i] for i in best_params_index[0]\n",
      "   357                                         ]\n",
      "   358    145.1 MiB      0.0 MiB               ave_bgmt = np.mean(best_gram_matrix_time)\n",
      "   359    145.1 MiB      0.0 MiB               std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n",
      "   360    145.1 MiB      0.0 MiB               print(\n",
      "   361    145.1 MiB      0.0 MiB                   'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n",
      "   362    145.1 MiB      0.0 MiB                   .format(average_gram_matrix_time, std_gram_matrix_time))\n",
      "   363    145.1 MiB      0.0 MiB               print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n",
      "   364    145.1 MiB      0.0 MiB                   ave_bgmt, std_bgmt))\n",
      "   365    145.1 MiB      0.0 MiB               print(\n",
      "   366    145.1 MiB      0.0 MiB                   'total training time with all hyper-param choices: {:.2f}s'.format(\n",
      "   367    145.1 MiB      0.0 MiB                       tt_total))\n",
      "   368    145.1 MiB      0.0 MiB               str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n",
      "   369    145.1 MiB      0.0 MiB               str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n",
      "   370    145.1 MiB      0.0 MiB               str_fw += 'total training time with all hyper-param choices: {:.2f}s\\n\\n'.format(tt_total)\n",
      "   371                                 \n",
      "   372                                         # # save results to file\n",
      "   373                                         # np.savetxt(results_name_pre + 'average_train_scores.dt',\n",
      "   374                                         #            average_train_scores)\n",
      "   375                                         # np.savetxt(results_name_pre + 'average_val_scores', average_val_scores)\n",
      "   376                                         # np.savetxt(results_name_pre + 'average_perf_scores.dt',\n",
      "   377                                         #            average_perf_scores)\n",
      "   378                                         # np.savetxt(results_name_pre + 'std_train_scores.dt', std_train_scores)\n",
      "   379                                         # np.savetxt(results_name_pre + 'std_val_scores.dt', std_val_scores)\n",
      "   380                                         # np.savetxt(results_name_pre + 'std_perf_scores.dt', std_perf_scores)\n",
      "   381                                 \n",
      "   382                                         # np.save(results_name_pre + 'best_params_index', best_params_index)\n",
      "   383                                         # np.save(results_name_pre + 'best_params_pre.dt', best_params_out)\n",
      "   384                                         # np.save(results_name_pre + 'best_params_in.dt', best_params_in)\n",
      "   385                                         # np.save(results_name_pre + 'best_val_perf.dt', best_val_perf)\n",
      "   386                                         # np.save(results_name_pre + 'best_val_std.dt', best_val_std)\n",
      "   387                                         # np.save(results_name_pre + 'final_performance.dt', final_performance)\n",
      "   388                                         # np.save(results_name_pre + 'final_confidence.dt', final_confidence)\n",
      "   389                                         # np.save(results_name_pre + 'train_performance.dt', train_performance)\n",
      "   390                                         # np.save(results_name_pre + 'train_std.dt', train_std)\n",
      "   391                                 \n",
      "   392                                         # np.save(results_name_pre + 'gram_matrix_time.dt', gram_matrix_time)\n",
      "   393                                         # np.save(results_name_pre + 'average_gram_matrix_time.dt',\n",
      "   394                                         #         average_gram_matrix_time)\n",
      "   395                                         # np.save(results_name_pre + 'std_gram_matrix_time.dt',\n",
      "   396                                         #         std_gram_matrix_time)\n",
      "   397                                         # np.save(results_name_pre + 'best_gram_matrix_time.dt',\n",
      "   398                                         #         best_gram_matrix_time)\n",
      "   399                                 \n",
      "   400                                         # print out as table.\n",
      "   401    145.1 MiB      0.0 MiB               from collections import OrderedDict\n",
      "   402    145.1 MiB      0.0 MiB               from tabulate import tabulate\n",
      "   403    145.1 MiB      0.0 MiB               table_dict = {}\n",
      "   404    145.1 MiB      0.0 MiB               if model_type == 'regression':\n",
      "   405    145.1 MiB      0.0 MiB                   for param_in in param_list:\n",
      "   406    145.1 MiB      0.0 MiB                       param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n",
      "   407                                         else:\n",
      "   408                                             for param_in in param_list:\n",
      "   409                                                 param_in['C'] = '{:.2e}'.format(param_in['C'])\n",
      "   410    145.1 MiB      0.0 MiB               table_dict['params'] = [{**param_out, **param_in}\n",
      "   411    145.1 MiB      0.0 MiB                                       for param_in in param_list for param_out in param_list_pre_revised]\n",
      "   412                                         table_dict['gram_matrix_time'] = [\n",
      "   413    145.1 MiB      0.0 MiB                   '{:.2f}'.format(gram_matrix_time[index_out])\n",
      "   414    145.1 MiB      0.0 MiB                   for param_in in param_list\n",
      "   415    145.1 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   416                                         ]\n",
      "   417                                         table_dict['valid_perf'] = [\n",
      "   418    145.1 MiB      0.0 MiB                   '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n",
      "   419                                                                    std_val_scores[index_out][index_in])\n",
      "   420    145.1 MiB      0.0 MiB                   for index_in, _ in enumerate(param_list)\n",
      "   421    145.1 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   422                                         ]\n",
      "   423                                         table_dict['test_perf'] = [\n",
      "   424    145.1 MiB      0.0 MiB                   '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n",
      "   425                                                                    std_perf_scores[index_out][index_in])\n",
      "   426    145.1 MiB      0.0 MiB                   for index_in, _ in enumerate(param_list)\n",
      "   427    145.1 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   428                                         ]\n",
      "   429                                         table_dict['train_perf'] = [\n",
      "   430    145.1 MiB      0.0 MiB                   '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n",
      "   431                                                                    std_train_scores[index_out][index_in])\n",
      "   432    145.1 MiB      0.0 MiB                   for index_in, _ in enumerate(param_list)\n",
      "   433    145.1 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   434                                         ]\n",
      "   435                                         keyorder = [\n",
      "   436    145.1 MiB      0.0 MiB                   'params', 'train_perf', 'valid_perf', 'test_perf',\n",
      "   437    145.1 MiB      0.0 MiB                   'gram_matrix_time'\n",
      "   438                                         ]\n",
      "   439    145.1 MiB      0.0 MiB               print()\n",
      "   440    145.1 MiB      0.0 MiB               tb_print = tabulate(\n",
      "   441    145.1 MiB      0.0 MiB                   OrderedDict(\n",
      "   442    145.1 MiB      0.0 MiB                       sorted(table_dict.items(),\n",
      "   443    145.1 MiB      0.0 MiB                              key=lambda i: keyorder.index(i[0]))),\n",
      "   444    145.1 MiB      0.0 MiB                   headers='keys')\n",
      "   445                             #            print(tb_print)\n",
      "   446    145.1 MiB      0.0 MiB               str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n",
      "   447                                 \n",
      "   448                                 # read gram matrices from file.\n",
      "   449                                 else:    \n",
      "   450                                     # Grid of parameters with a discrete number of values for each.\n",
      "   451                             #        param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n",
      "   452                                     param_list = list(ParameterGrid(param_grid))\n",
      "   453                                 \n",
      "   454                                     # read gram matrices from file.\n",
      "   455                                     print()\n",
      "   456                                     print('2. Reading gram matrices from file...')\n",
      "   457                                     str_fw += '\\nII. Gram matrices.\\n\\nGram matrices are read from file, see last log for detail.\\n'\n",
      "   458                                     gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')\n",
      "   459                                     gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed\n",
      "   460                                     gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices\n",
      "   461                                     param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones\n",
      "   462                                     y = gmfile['y'].tolist()\n",
      "   463                                     \n",
      "   464                                     tts = time.time()  # start training time\n",
      "   465                             #        nb_gm_ignore = 0  # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)            \n",
      "   466                                     print(\n",
      "   467                                         '3. Fitting and predicting using nested cross validation. This could really take a while...'\n",
      "   468                                     )\n",
      "   469                              \n",
      "   470                                     # ---- use pool.imap_unordered to parallel and track progress. ----\n",
      "   471                                     def init_worker(gms_toshare):\n",
      "   472                                         global G_gms\n",
      "   473                                         G_gms = gms_toshare\n",
      "   474                             \n",
      "   475                                     pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n",
      "   476                                     trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n",
      "   477                                     train_pref = []\n",
      "   478                                     val_pref = []\n",
      "   479                                     test_pref = []\n",
      "   480                                     chunksize = 1\n",
      "   481                                     for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n",
      "   482                                         train_pref.append(o1)\n",
      "   483                                         val_pref.append(o2)\n",
      "   484                                         test_pref.append(o3)\n",
      "   485                                     pool.close()\n",
      "   486                                     pool.join()\n",
      "   487                                     \n",
      "   488                                     # # ---- use pool.map to parallel. ----\n",
      "   489                                     # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n",
      "   490                                     # train_pref = [item[0] for item in result_perf]\n",
      "   491                                     # val_pref = [item[1] for item in result_perf]\n",
      "   492                                     # test_pref = [item[2] for item in result_perf]\n",
      "   493                             \n",
      "   494                                     # # ---- use joblib.Parallel to parallel and track progress. ----\n",
      "   495                                     # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)\n",
      "   496                                     # result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS))\n",
      "   497                                     # train_pref = [item[0] for item in result_perf]\n",
      "   498                                     # val_pref = [item[1] for item in result_perf]\n",
      "   499                                     # test_pref = [item[2] for item in result_perf]\n",
      "   500                             \n",
      "   501                             #        # ---- direct running, normally use a single CPU core. ----\n",
      "   502                             #        train_pref = []\n",
      "   503                             #        val_pref = []\n",
      "   504                             #        test_pref = []\n",
      "   505                             #        for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n",
      "   506                             #            o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n",
      "   507                             #            train_pref.append(o1)\n",
      "   508                             #            val_pref.append(o2)\n",
      "   509                             #            test_pref.append(o3)\n",
      "   510                             \n",
      "   511                                     print()\n",
      "   512                                     print('4. Getting final performance...')\n",
      "   513                                     str_fw += '\\nIII. Performance.\\n\\n'\n",
      "   514                                     # averages and confidences of performances on outer trials for each combination of parameters\n",
      "   515                                     average_train_scores = np.mean(train_pref, axis=0)\n",
      "   516                                     average_val_scores = np.mean(val_pref, axis=0)\n",
      "   517                                     average_perf_scores = np.mean(test_pref, axis=0)\n",
      "   518                                     # sample std is used here\n",
      "   519                                     std_train_scores = np.std(train_pref, axis=0, ddof=1)\n",
      "   520                                     std_val_scores = np.std(val_pref, axis=0, ddof=1)\n",
      "   521                                     std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n",
      "   522                             \n",
      "   523                                     if model_type == 'regression':\n",
      "   524                                         best_val_perf = np.amin(average_val_scores)\n",
      "   525                                     else:\n",
      "   526                                         best_val_perf = np.amax(average_val_scores)\n",
      "   527                                     best_params_index = np.where(average_val_scores == best_val_perf)\n",
      "   528                                     # find smallest val std with best val perf.\n",
      "   529                                     best_val_stds = [\n",
      "   530                                         std_val_scores[value][best_params_index[1][idx]]\n",
      "   531                                         for idx, value in enumerate(best_params_index[0])\n",
      "   532                                     ]\n",
      "   533                                     min_val_std = np.amin(best_val_stds)\n",
      "   534                                     best_params_index = np.where(std_val_scores == min_val_std)\n",
      "   535                                     best_params_out = [\n",
      "   536                                         param_list_pre_revised[i] for i in best_params_index[0]\n",
      "   537                                     ]\n",
      "   538                                     best_params_in = [param_list[i] for i in best_params_index[1]]\n",
      "   539                                     print('best_params_out: ', best_params_out)\n",
      "   540                                     print('best_params_in: ', best_params_in)\n",
      "   541                                     print()\n",
      "   542                                     print('best_val_perf: ', best_val_perf)\n",
      "   543                                     print('best_val_std: ', min_val_std)\n",
      "   544                                     str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n",
      "   545                                     str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n",
      "   546                                     str_fw += 'best_val_perf: %s\\n' % best_val_perf\n",
      "   547                                     str_fw += 'best_val_std: %s\\n' % min_val_std\n",
      "   548                             \n",
      "   549                                     final_performance = [\n",
      "   550                                         average_perf_scores[value][best_params_index[1][idx]]\n",
      "   551                                         for idx, value in enumerate(best_params_index[0])\n",
      "   552                                     ]\n",
      "   553                                     final_confidence = [\n",
      "   554                                         std_perf_scores[value][best_params_index[1][idx]]\n",
      "   555                                         for idx, value in enumerate(best_params_index[0])\n",
      "   556                                     ]\n",
      "   557                                     print('final_performance: ', final_performance)\n",
      "   558                                     print('final_confidence: ', final_confidence)\n",
      "   559                                     str_fw += 'final_performance: %s\\n' % final_performance\n",
      "   560                                     str_fw += 'final_confidence: %s\\n' % final_confidence\n",
      "   561                                     train_performance = [\n",
      "   562                                         average_train_scores[value][best_params_index[1][idx]]\n",
      "   563                                         for idx, value in enumerate(best_params_index[0])\n",
      "   564                                     ]\n",
      "   565                                     train_std = [\n",
      "   566                                         std_train_scores[value][best_params_index[1][idx]]\n",
      "   567                                         for idx, value in enumerate(best_params_index[0])\n",
      "   568                                     ]\n",
      "   569                                     print('train_performance: %s' % train_performance)\n",
      "   570                                     print('train_std: ', train_std)\n",
      "   571                                     str_fw += 'train_performance: %s\\n' % train_performance\n",
      "   572                                     str_fw += 'train_std: %s\\n\\n' % train_std\n",
      "   573                             \n",
      "   574                                     print()\n",
      "   575                                     average_gram_matrix_time = np.mean(gram_matrix_time)\n",
      "   576                                     std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n",
      "   577                                     best_gram_matrix_time = [\n",
      "   578                                         gram_matrix_time[i] for i in best_params_index[0]\n",
      "   579                                     ]\n",
      "   580                                     ave_bgmt = np.mean(best_gram_matrix_time)\n",
      "   581                                     std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n",
      "   582                                     print(\n",
      "   583                                         'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n",
      "   584                                         .format(average_gram_matrix_time, std_gram_matrix_time))\n",
      "   585                                     print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n",
      "   586                                         ave_bgmt, std_bgmt))\n",
      "   587                                     tt_poster = time.time() - tts  # training time with hyper-param choices who did not participate in calculation of gram matrices\n",
      "   588                                     print(\n",
      "   589                                         'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format(\n",
      "   590                                             tt_poster))\n",
      "   591                                     print('total training time with all hyper-param choices: {:.2f}s'.format(\n",
      "   592                                             tt_poster + np.sum(gram_matrix_time)))\n",
      "   593                             #        str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n",
      "   594                             #        str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n",
      "   595                                     str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\\n\\n'.format(tt_poster)\n",
      "   596                             \n",
      "   597                                     # print out as table.\n",
      "   598                                     from collections import OrderedDict\n",
      "   599                                     from tabulate import tabulate\n",
      "   600                                     table_dict = {}\n",
      "   601                                     if model_type == 'regression':\n",
      "   602                                         for param_in in param_list:\n",
      "   603                                             param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n",
      "   604                                     else:\n",
      "   605                                         for param_in in param_list:\n",
      "   606                                             param_in['C'] = '{:.2e}'.format(param_in['C'])\n",
      "   607                                     table_dict['params'] = [{**param_out, **param_in}\n",
      "   608                                                             for param_in in param_list for param_out in param_list_pre_revised]\n",
      "   609                             #        table_dict['gram_matrix_time'] = [\n",
      "   610                             #            '{:.2f}'.format(gram_matrix_time[index_out])\n",
      "   611                             #            for param_in in param_list\n",
      "   612                             #            for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   613                             #        ]\n",
      "   614                                     table_dict['valid_perf'] = [\n",
      "   615                                         '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n",
      "   616                                                                std_val_scores[index_out][index_in])\n",
      "   617                                         for index_in, _ in enumerate(param_list)\n",
      "   618                                         for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   619                                     ]\n",
      "   620                                     table_dict['test_perf'] = [\n",
      "   621                                         '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n",
      "   622                                                                std_perf_scores[index_out][index_in])\n",
      "   623                                         for index_in, _ in enumerate(param_list)\n",
      "   624                                         for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   625                                     ]\n",
      "   626                                     table_dict['train_perf'] = [\n",
      "   627                                         '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n",
      "   628                                                                std_train_scores[index_out][index_in])\n",
      "   629                                         for index_in, _ in enumerate(param_list)\n",
      "   630                                         for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   631                                     ]\n",
      "   632                                     keyorder = [\n",
      "   633                                         'params', 'train_perf', 'valid_perf', 'test_perf'\n",
      "   634                                     ]\n",
      "   635                                     print()\n",
      "   636                                     tb_print = tabulate(\n",
      "   637                                         OrderedDict(\n",
      "   638                                             sorted(table_dict.items(),\n",
      "   639                                                    key=lambda i: keyorder.index(i[0]))),\n",
      "   640                                         headers='keys')\n",
      "   641                             #        print(tb_print)\n",
      "   642                                     str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n",
      "   643                             \n",
      "   644                                     # open file to save all results for this dataset.\n",
      "   645                                     if not os.path.exists(results_dir):\n",
      "   646                                         os.makedirs(results_dir)\n",
      "   647                                         \n",
      "   648                                 # open file to save all results for this dataset.\n",
      "   649    145.1 MiB      0.0 MiB       if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):\n",
      "   650                                     with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:\n",
      "   651                                         f.write(str_fw)\n",
      "   652                                 else:\n",
      "   653    145.1 MiB      0.0 MiB           with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f:\n",
      "   654    145.1 MiB      0.0 MiB               content = f.read()\n",
      "   655    145.1 MiB      0.0 MiB               f.seek(0, 0)\n",
      "   656    145.1 MiB      0.0 MiB               f.write(str_fw + '\\n\\n\\n' + content)\n",
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import functools\n",
    "from libs import *\n",
    "import multiprocessing\n",
    "\n",
    "from pygraph.kernels.spKernel import spkernel\n",
    "from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n",
    "#from pygraph.utils.model_selection_precomputed import trial_do\n",
    "\n",
    "dslist = [\n",
    "    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n",
    "        'task': 'regression'},  # node symb\n",
    "#    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
    "#             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  \n",
    "#    # contains single node graph, node symb\n",
    "#    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb\n",
    "#    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled\n",
    "#    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
    "#             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb\n",
    "#    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
    "#    # node nsymb\n",
    "#    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "#    # node symb/nsymb\n",
    "#    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
    "#    # node/edge symb\n",
    "#    {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
    "#     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},  # node symb\n",
    "\n",
    "    #     {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n",
    "    # # #     {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n",
    "    # # #     {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n",
    "    #     {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n",
    "    #\n",
    "    # #     {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n",
    "    # #     {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n",
    "    # #     {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n",
    "\n",
    "    # #     {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n",
    "    #     {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
    "    #         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "    #     {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
    "    #         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "    #     {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
    "    #         'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n",
    "\n",
    "    #     # not working below\n",
    "    #     {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
    "    #     {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n",
    "    #     {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n",
    "    #     {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
    "]\n",
    "estimator = spkernel\n",
    "mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n",
    "param_grid_precomputed = {'node_kernels': [\n",
    "    {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n",
    "param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n",
    "              {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n",
    "\n",
    "for ds in dslist:\n",
    "    print()\n",
    "    print(ds['name'])\n",
    "    model_selection_for_precomputed_kernel(\n",
    "        ds['dataset'],\n",
    "        estimator,\n",
    "        param_grid_precomputed,\n",
    "        (param_grid[1] if ('task' in ds and ds['task']\n",
    "                           == 'regression') else param_grid[0]),\n",
    "        (ds['task'] if 'task' in ds else 'classification'),\n",
    "        NUM_TRIALS=30,\n",
    "        datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n",
    "        extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n",
    "        ds_name=ds['name'],\n",
    "        n_jobs=multiprocessing.cpu_count(),\n",
    "        read_gm_from_file=False)\n",
    "    print()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/notebooks/results/spkernel/Acyclic.gm.npz
+++ b/notebooks/notebooks/results/spkernel/Acyclic.gm.npz
--- a/notebooks/notebooks/results/spkernel/Acyclic.output.txt
+++ b/notebooks/notebooks/results/spkernel/Acyclic.output.txt
@@ -0,0 +1,177 @@
 ###################### log time: 2019-03-26 10:59:51. ######################

 # This file contains results of spkernel on dataset Acyclic,
 # including gram matrices, serial numbers for gram matrix figures and performance.

 This is a regression problem.

 II. Gram matrices.

 the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8} is:

 [[1.        ,0.47140452,0.33333333,...,0.30151134,0.30512858,0.27852425],
 [0.47140452,1.        ,0.        ,...,0.14213381,0.11986583,0.17232809],
 [0.33333333,0.        ,1.        ,...,0.36851387,0.37293493,0.34815531],
 ...,
 [0.30151134,0.14213381,0.36851387,...,1.        ,0.96429344,0.95175317],
 [0.30512858,0.11986583,0.37293493,...,0.96429344,1.        ,0.96671243],
 [0.27852425,0.17232809,0.34815531,...,0.95175317,0.96671243,1.        ]]

 1 gram matrices are calculated, 0 of which are ignored.

 serial numbers of gram matrix figures and their corresponding parameters settings:

 0: {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8}

 III. Performance.

 best settings of hyper-params to build gram matrix: [{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8}]
 best settings of other hyper-params: [{'alpha': 1e-06}]

 best_val_perf: 9.55244065682399
 best_val_std: 0.5574811966683159
 final_performance: [9.724426192585643]
 final_confidence: [2.999822095078807]
 train_performance: [6.141755071354953]
 train_std: [0.2732168016478284]

 time to calculate gram matrix with different hyper-params: 16.95±nans
 time to calculate best gram matrix: 16.95±nans
 total training time with all hyper-param choices: 32.74s

 table of performance v.s. hyper-params:

 params                                                                                                                                                                                                                                                                                                                     train_perf    valid_perf    test_perf      gram_matrix_time
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------  ------------  ------------  -----------  ------------------
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-10'}  6.14±0.28     9.70±0.61     9.74±3.00                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-10'}  6.13±0.27     9.75±0.74     9.74±3.03                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-09'}  6.14±0.28     9.68±0.45     9.74±3.04                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-09'}  6.14±0.28     9.75±0.55     9.76±2.99                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-08'}  6.14±0.28     9.60±0.65     9.71±2.99                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-08'}  6.14±0.27     9.74±0.64     9.74±3.00                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-07'}  6.14±0.28     9.60±0.66     9.73±2.98                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-07'}  6.14±0.28     9.77±0.65     9.77±3.07                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-06'}  6.14±0.27     9.55±0.56     9.72±3.00                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-06'}  6.13±0.27     9.79±0.61     9.73±3.04                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-05'}  6.14±0.27     9.68±0.57     9.75±3.01                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-05'}  6.14±0.27     9.75±0.57     9.70±3.02                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-04'}  6.14±0.27     9.56±0.56     9.69±2.98                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-04'}  6.15±0.27     9.62±0.65     9.70±2.97                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-03'}  6.19±0.27     9.65±0.74     9.69±2.98                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-03'}  6.36±0.27     9.73±0.46     9.71±2.92                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-02'}  6.80±0.25     9.90±0.52     9.93±2.98                 16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-02'}  7.63±0.25     10.33±0.57    10.29±3.01                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-01'}  9.25±0.25     11.41±0.56    11.29±2.90                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-01'}  12.42±0.25    14.03±0.34    14.06±2.65                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+00'}  17.48±0.24    18.67±0.35    19.06±2.33                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+00'}  24.52±0.21    25.24±0.31    26.11±2.41                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+01'}  34.07±0.20    34.29±0.31    35.50±4.09                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+01'}  48.90±0.28    48.62±0.40    49.78±7.09                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+02'}  75.87±0.52    75.45±0.68    76.11±9.09                16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+02'}  107.85±0.80   107.50±0.87   107.80±9.36               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+03'}  128.21±0.96   127.84±1.04   128.07±9.24               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+03'}  136.81±1.03   136.43±1.13   136.63±9.17               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+04'}  139.82±1.05   139.40±1.13   139.63±9.14               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+04'}  140.80±1.05   140.41±1.07   140.61±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+05'}  141.12±1.06   140.71±1.04   140.92±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+05'}  141.22±1.06   140.84±1.12   141.02±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+06'}  141.25±1.06   140.79±1.12   141.06±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+06'}  141.26±1.06   140.87±1.06   141.07±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+07'}  141.26±1.06   140.85±1.07   141.07±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+07'}  141.26±1.06   140.79±1.05   141.07±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+08'}  141.26±1.06   140.79±1.17   141.07±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+08'}  141.26±1.06   140.86±1.08   141.07±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+09'}  141.26±1.06   140.93±1.06   141.07±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+09'}  141.26±1.06   140.85±1.13   141.07±9.13               16.95
 {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+10'}  141.26±1.06   140.80±1.07   141.07±9.13               16.95




 ###################### log time: 2019-03-26 10:58:24. ######################

 # This file contains results of spkernel on dataset Acyclic,
 # including gram matrices, serial numbers for gram matrix figures and performance.

 This is a regression problem.

 II. Gram matrices.

 the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8} is:

 [[1.        ,0.47140452,0.33333333,...,0.30151134,0.30512858,0.27852425],
 [0.47140452,1.        ,0.        ,...,0.14213381,0.11986583,0.17232809],
 [0.33333333,0.        ,1.        ,...,0.36851387,0.37293493,0.34815531],
 ...,
 [0.30151134,0.14213381,0.36851387,...,1.        ,0.96429344,0.95175317],
 [0.30512858,0.11986583,0.37293493,...,0.96429344,1.        ,0.96671243],
 [0.27852425,0.17232809,0.34815531,...,0.95175317,0.96671243,1.        ]]

 1 gram matrices are calculated, 0 of which are ignored.

 serial numbers of gram matrix figures and their corresponding parameters settings:

 0: {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8}

 III. Performance.

 best settings of hyper-params to build gram matrix: [{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8}]
 best settings of other hyper-params: [{'alpha': 1e-07}]

 best_val_perf: 9.574376867060177
 best_val_std: 0.7335499737848491
 final_performance: [9.50365754990661]
 final_confidence: [2.8602395698342087]
 train_performance: [6.17134653357633]
 train_std: [0.25758350163124855]

 time to calculate gram matrix with different hyper-params: 1.29±nans
 time to calculate best gram matrix: 1.29±nans
 total training time with all hyper-param choices: 5.19s

 table of performance v.s. hyper-params:

 params                                                                                                                                                                                                                                                                                                                     train_perf    valid_perf    test_perf      gram_matrix_time
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------  ------------  ------------  -----------  ------------------
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-10'}  6.16±0.26     9.75±0.65     9.54±2.84                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-10'}  6.16±0.26     9.75±0.66     9.53±2.90                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-09'}  6.17±0.27     9.78±0.61     9.50±2.82                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-09'}  6.16±0.26     9.79±0.56     9.53±2.83                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-08'}  6.17±0.26     9.70±0.58     9.52±2.84                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-08'}  6.16±0.25     9.81±0.68     9.52±2.82                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-07'}  6.17±0.26     9.57±0.73     9.50±2.86                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-07'}  6.16±0.26     9.95±0.70     9.51±2.86                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-06'}  6.17±0.26     9.81±0.58     9.54±2.88                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-06'}  6.16±0.26     9.74±0.70     9.53±2.94                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-05'}  6.17±0.26     9.71±0.61     9.54±2.92                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-05'}  6.17±0.26     9.69±0.61     9.51±2.88                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-04'}  6.17±0.26     9.72±0.70     9.50±2.79                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-04'}  6.18±0.26     9.62±0.73     9.42±2.85                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-03'}  6.21±0.26     9.91±0.52     9.40±2.78                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-03'}  6.39±0.25     9.86±0.64     9.42±2.79                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-02'}  6.83±0.25     9.94±0.56     9.59±2.80                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-02'}  7.66±0.24     10.30±0.45    9.99±2.69                  1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-01'}  9.28±0.24     11.38±0.36    11.02±2.55                 1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-01'}  12.45±0.22    14.06±0.38    13.79±2.36                 1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+00'}  17.53±0.21    18.74±0.31    18.88±2.23                 1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+00'}  24.57±0.19    25.32±0.28    26.29±2.72                 1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+01'}  34.07±0.22    34.30±0.34    36.29±4.52                 1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+01'}  48.85±0.34    48.65±0.41    51.21±7.45                 1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+02'}  75.76±0.57    75.36±0.60    77.93±9.56                 1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+02'}  107.68±0.86   107.24±0.95   109.70±9.96                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+03'}  128.01±1.04   127.59±1.03   129.96±9.91                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+03'}  136.59±1.11   136.19±1.20   138.51±9.86                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+04'}  139.60±1.14   139.22±1.11   141.51±9.84                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+04'}  140.58±1.15   140.22±1.21   142.49±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+05'}  140.89±1.15   140.48±1.14   142.80±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+05'}  140.99±1.15   140.54±1.17   142.90±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+06'}  141.02±1.15   140.61±1.20   142.93±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+06'}  141.04±1.15   140.65±1.23   142.94±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+07'}  141.04±1.15   140.66±1.20   142.94±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+07'}  141.04±1.15   140.64±1.24   142.94±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+08'}  141.04±1.15   140.65±1.14   142.95±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+08'}  141.04±1.15   140.61±1.22   142.95±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+09'}  141.04±1.15   140.58±1.15   142.95±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+09'}  141.04±1.15   140.71±1.17   142.95±9.83                1.29
 {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+10'}  141.04±1.15   140.68±1.11   142.95±9.83                1.29

--- a/notebooks/notebooks/results/spkernel/ds-unknown.gm.npz
+++ b/notebooks/notebooks/results/spkernel/ds-unknown.gm.npz
--- a/notebooks/notebooks/results/spkernel/ds-unknown.output.txt
+++ b/notebooks/notebooks/results/spkernel/ds-unknown.output.txt
@@ -0,0 +1,67 @@
 ###################### log time: 2019-03-26 11:56:19. ######################

 # This file contains results of spkernel on dataset ds-unknown,
 # including gram matrices, serial numbers for gram matrix figures and performance.

 This is a regression problem.

 II. Gram matrices.

 the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1} is:

 [[1.        ,0.47140452,0.33333333,...,0.30151134,0.30512858,0.27852425],
 [0.47140452,1.        ,0.        ,...,0.14213381,0.11986583,0.17232809],
 [0.33333333,0.        ,1.        ,...,0.36851387,0.37293493,0.34815531],
 ...,
 [0.30151134,0.14213381,0.36851387,...,1.        ,0.96429344,0.95175317],
 [0.30512858,0.11986583,0.37293493,...,0.96429344,1.        ,0.96671243],
 [0.27852425,0.17232809,0.34815531,...,0.95175317,0.96671243,1.        ]]

 1 gram matrices are calculated, 0 of which are ignored.

 serial numbers of gram matrix figures and their corresponding parameters settings:

 0: {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1}

 III. Performance.

 best settings of hyper-params to build gram matrix: [{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1}]
 best settings of other hyper-params: [{'alpha': 0.0001}]

 best_val_perf: 9.922073568477266
 best_val_std: 0.3829108688812842
 final_performance: [8.039190309451554]
 final_confidence: [2.8576078550320037]
 train_performance: [6.285008316076738]
 train_std: [0.23613211181729038]

 time to calculate gram matrix with different hyper-params: 3.52±nans
 time to calculate best gram matrix: 3.52±nans
 total training time with all hyper-param choices: 4.34s

 table of performance v.s. hyper-params:

 params                                                                                                                                                                                                                                                                                                                     train_perf    valid_perf    test_perf       gram_matrix_time
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------  ------------  ------------  ------------  ------------------
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-05'}  6.26±0.24     10.65±0.66    8.29±3.21                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-05'}  6.28±0.25     10.69±0.03    8.15±3.02                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-04'}  6.29±0.24     9.92±0.38     8.04±2.86                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-04'}  6.29±0.28     10.29±0.77    7.97±2.94                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-03'}  6.34±0.25     10.16±0.93    8.02±3.04                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-03'}  6.53±0.24     10.08±0.24    7.82±3.10                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-02'}  6.95±0.25     10.54±0.05    8.02±3.58                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-02'}  7.77±0.33     10.76±0.14    8.60±4.14                   3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-01'}  9.34±0.35     11.60±0.14    10.01±4.61                  3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-01'}  12.51±0.31    14.52±0.68    13.44±4.70                  3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+00'}  17.59±0.32    18.61±0.28    19.80±5.18                  3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+00'}  24.46±0.39    25.24±0.56    28.52±6.10                  3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+01'}  33.85±0.38    34.04±0.04    39.01±8.31                  3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+01'}  48.65±0.49    48.14±0.20    54.40±12.56                 3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+02'}  75.53±0.93    75.24±1.32    81.83±16.62                 3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+02'}  107.29±1.56   106.50±0.85   114.11±18.46                3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+03'}  127.49±2.04   127.24±2.09   134.61±19.05                3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+03'}  136.01±2.24   135.60±2.06   143.25±19.23                3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+04'}  138.99±2.32   138.66±2.41   146.27±19.28                3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+04'}  139.97±2.35   139.63±2.70   147.26±19.30                3.52
 {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+05'}  140.28±2.35   139.84±2.38   147.58±19.30                3.52

--- a/notebooks/py-graph_test.ipynb
+++ b/notebooks/py-graph_test.ipynb
@@ -1,170 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "autoscroll": false,
    "ein.tags": "worksheet-0",
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import paths\n",
    "\n",
    "import pygraph\n",
    "\n",
    "from pygraph.utils.graphfiles import loadDataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "autoscroll": false,
    "ein.tags": "worksheet-0",
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [],
   "source": [
    "import networkx as nx\n",
    "import numpy as np\n",
    "import matplotlib.pyplot  as plt\n",
    "\n",
    "# We load a ds dataset\n",
    "# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n",
    "dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "autoscroll": false,
    "ein.tags": "worksheet-0",
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 183/183 [07:41<00:00,  2.52s/it]\n",
      "100%|██████████| 183/183 [08:39<00:00,  2.84s/it]\n",
      "100%|██████████| 183/183 [05:19<00:00,  1.75s/it]\n",
      "100%|██████████| 183/183 [05:50<00:00,  1.91s/it]\n"
     ]
    }
   ],
   "source": [
    "#Compute graph edit distances\n",
    "\n",
    "from tqdm import tqdm\n",
    "from pygraph.c_ext.lsape_binders import  lsap_solverHG\n",
    "from pygraph.ged.costfunctions import ConstantCostFunction\n",
    "from pygraph.ged.GED import ged\n",
    "import time\n",
    "\n",
    "cf = ConstantCostFunction(1,3,1,3)\n",
    "N=len(dataset)\n",
    "\n",
    "methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n",
    "ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n",
    "\n",
    "times = list()\n",
    "start = time.clock()\n",
    "for i in tqdm(range(0,N)):\n",
    "    for j in range(0,N):\n",
    "        ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n",
    "times.append(time.clock() - start)\n",
    "\n",
    "\n",
    "start = time.clock()\n",
    "for i in tqdm(range(0,N)):\n",
    "    for j in range(0,N):\n",
    "        ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n",
    "\n",
    "times.append(time.clock() - start)\n",
    "\n",
    "start = time.clock()\n",
    "for i in tqdm(range(0,N)):\n",
    "    for j in range(0,N):\n",
    "        ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n",
    "times.append(time.clock() - start)\n",
    "\n",
    "start = time.clock()\n",
    "for i in tqdm(range(0,N)):\n",
    "    for j in range(0,N):\n",
    "        ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n",
    "times.append(time.clock() - start)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "autoscroll": false,
    "ein.tags": "worksheet-0",
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " method \t mean \t mean \t time\n",
      " Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n",
      " Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n",
      " Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n",
      " Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n"
     ]
    }
   ],
   "source": [
    "print(\" method \\t mean \\t mean \\t time\")\n",
    "data = list()\n",
    "for i in range(0,len(ged_distances)):\n",
    "    ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n",
    "    print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "name": "py-graph_test.ipynb"
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/run_commonwalkkernel.ipynb
+++ b/notebooks/run_commonwalkkernel.ipynb
--- a/notebooks/run_marginalizedkernel.ipynb
+++ b/notebooks/run_marginalizedkernel.ipynb
--- a/notebooks/run_pathkernel.ipynb
+++ b/notebooks/run_pathkernel.ipynb
--- a/notebooks/run_randomwalkkernel.ipynb
+++ b/notebooks/run_randomwalkkernel.ipynb
--- a/notebooks/run_spkernel.ipynb
+++ b/notebooks/run_spkernel.ipynb
@@ -12,6 +12,109 @@
     "output_type": "stream",
     "text": [
      "\n",
      "Acyclic\n",
      "\n",
      "--- This is a regression problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting sp graphs: 183it [00:00, 5345.48it/s]\n",
      "calculating kernels: 16836it [00:01, 16066.90it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 183 built in 1.2855160236358643 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n",
      "\n",
      "\n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 30it [00:03,  8.63it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "best_params_out:  [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'alpha': 0.0001}]\n",
      "\n",
      "best_val_perf:  9.674788994813262\n",
      "best_val_std:  0.6229031522274688\n",
      "final_performance:  [9.590999824754439]\n",
      "final_confidence:  [2.911796096257332]\n",
      "train_performance: [6.16594412531739]\n",
      "train_std:  [0.2739093211154806]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 1.29±nans\n",
      "time to calculate best gram matrix: 1.29±nans\n",
      "total training time with all hyper-param choices: 5.15s\n",
      "\n",
      "\n",
      "\n",
      "Alkane\n",
      "\n",
      "--- This is a regression problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "\n",
      " 1 graphs are removed as they don't contain edges.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n",
      "  keepdims=keepdims)\n",
      "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  ret = ret.dtype.type(ret / rcount)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "getting sp graphs: 149it [00:00, 6510.18it/s]\n",
      "calculating kernels: 11175it [00:00, 18881.68it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 149 built in 0.8007419109344482 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n",
      "\n",
      "\n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 30it [00:02, 10.52it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "best_params_out:  [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'alpha': 3.162277660168379e-07}]\n",
      "\n",
      "best_val_perf:  8.784264102873752\n",
      "best_val_std:  0.2656887278835053\n",
      "final_performance:  [8.059911355753659]\n",
      "final_confidence:  [1.9620843656589473]\n",
      "train_performance: [7.8406202266920575]\n",
      "train_std:  [0.2177862360087283]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 0.80±nans\n",
      "time to calculate best gram matrix: 0.80±nans\n",
      "total training time with all hyper-param choices: 4.02s\n",
      "\n",
      "\n",
      "\n",
      "MAO\n",
      "\n",
      "--- This is a classification problem ---\n",
@@ -23,17 +126,61 @@
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting sp graphs: 68it [00:00, 692.11it/s]\n",
      "calculating kernels: 2346it [00:05, 399.28it/s]\n",
      "getting sp graphs: 68it [00:00, 1095.77it/s]\n",
      "calculating kernels: 2346it [00:02, 813.63it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 68 built in 3.110588550567627 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n",
      "\n",
      " --- shortest path kernel matrix of size 68 built in 6.345669507980347 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7fe240afd620>, 'nsymb': <function gaussiankernel at 0x7fe240afd9d8>, 'mix': functools.partial(<function kernelproduct at 0x7fe240aaf0d0>, <function deltakernel at 0x7fe240afd620>, <function gaussiankernel at 0x7fe240afd9d8>)}, 'n_jobs': 8} is: \n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 7it [00:09,  4.67s/it]"
      "cross validation: 30it [00:02, 10.97it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "best_params_out:  [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'C': 3162.2776601683795}]\n",
      "\n",
      "best_val_perf:  0.8798412698412699\n",
      "best_val_std:  0.02062186442241262\n",
      "final_performance:  [0.9042857142857144]\n",
      "final_confidence:  [0.07343487734322982]\n",
      "train_performance: [0.9709180695847363]\n",
      "train_std:  [0.005927396388634032]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 3.11±nans\n",
      "time to calculate best gram matrix: 3.11±nans\n",
      "total training time with all hyper-param choices: 6.21s\n",
      "\n",
      "\n",
      "\n",
      "PAH\n",
      "\n",
      "--- This is a classification problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting sp graphs: 94it [00:00, 2190.46it/s]\n",
      "calculating kernels: 4465it [00:05, 763.81it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 94 built in 6.083932399749756 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n",
      "\n",
      "\n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 0it [00:00, ?it/s]"
     ]
    }
   ],
@@ -46,45 +193,46 @@
    "from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n",
    "#from pygraph.utils.model_selection_precomputed import trial_do\n",
    "\n",
    "# datasets\n",
    "dslist = [\n",
    "#    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n",
    "#        'task': 'regression'},  # node symb\n",
    "#    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
    "#             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  \n",
    "#    # contains single node graph, node symb\n",
    "    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n",
    "        'task': 'regression'},  # node symb\n",
    "    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
    "             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  \n",
    "    # contains single node graph, node symb\n",
    "    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb\n",
    "#    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled\n",
    "#    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
    "#             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb\n",
    "#    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
    "#    # node nsymb\n",
    "#    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "#    # node symb/nsymb\n",
    "    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled\n",
    "    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
    "             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb\n",
    "    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
    "    # node nsymb\n",
    "    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "    # node symb/nsymb\n",
    "\n",
    "#    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
    "    # node/edge symb\n",
    "#    # node/edge symb\n",
    "#    {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
    "#     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},  # node symb\n",
    "\n",
    "    #     {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n",
    "    # # #     {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n",
    "    # # #     {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n",
    "    #     {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n",
    "    #\n",
    "    # #     {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n",
    "    # #     {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n",
    "    # #     {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n",
    "\n",
    "    # #     {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n",
    "    #     {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
    "    #         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "    #     {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
    "    #         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "    #     {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
    "    #         'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n",
    "#\n",
    "#    {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n",
    "#    {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n",
    "#    {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n",
    "#    {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n",
    "#    {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n",
    "#    {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n",
    "#    {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n",
    "#    {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n",
    "#    {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n",
    "#\n",
    "#    {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n",
    "#    {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n",
    "#    {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n",
    "#    {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
    "#         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "#    {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
    "#         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "#    {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
    "#         'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n",
    "\n",
    "    #     # not working below\n",
    "    #     {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
@@ -93,12 +241,14 @@
    "    #     {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
    "]\n",
    "estimator = spkernel\n",
    "# hyper-parameters\n",
    "mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n",
    "param_grid_precomputed = {'node_kernels': [\n",
    "    {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n",
    "param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n",
    "              {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n",
    "\n",
    "# for each dataset, do model selection.\n",
    "for ds in dslist:\n",
    "    print()\n",
    "    print(ds['name'])\n",
@@ -115,621 +265,7 @@
    "        ds_name=ds['name'],\n",
    "        n_jobs=multiprocessing.cpu_count(),\n",
    "        read_gm_from_file=False)\n",
    "    print()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.\n",
      "[Parallel(n_jobs=8)]: Done   2 out of   9 | elapsed:   15.7s remaining:   54.8s\n",
      "[Parallel(n_jobs=8)]: Done   3 out of   9 | elapsed:   15.7s remaining:   31.3s\n",
      "[Parallel(n_jobs=8)]: Done   4 out of   9 | elapsed:   15.7s remaining:   19.6s\n",
      "[Parallel(n_jobs=8)]: Done   5 out of   9 | elapsed:   15.7s remaining:   12.5s\n",
      "[Parallel(n_jobs=8)]: Done   6 out of   9 | elapsed:   15.7s remaining:    7.8s\n",
      "[Parallel(n_jobs=8)]: Done   7 out of   9 | elapsed:   15.7s remaining:    4.5s\n",
      "[Parallel(n_jobs=8)]: Done   9 out of   9 | elapsed:   15.7s remaining:    0.0s\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-1-ba0f5fe728f1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     81\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     82\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 83\u001b[0;31m \u001b[0mParallel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_cores\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdelayed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcompute_ds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mds\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdslist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    960\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    961\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 962\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    963\u001b[0m             \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    964\u001b[0m             \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    863\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    864\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 865\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    866\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    867\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m    513\u001b[0m         AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m    514\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 515\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    516\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    517\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/externals/loky/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    424\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 426\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    428\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/lib/python3.5/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    291\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    292\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    294\u001b[0m                 \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    295\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# # test parallel computing\n",
    "# import psutil\n",
    "# # logical=True counts threads, but we are interested in cores\n",
    "# psutil.()# .cpu_count(logical=False)\n",
    "%load_ext line_profiler\n",
    "%matplotlib inline\n",
    "import functools\n",
    "from libs import *\n",
    "from sklearn.metrics.pairwise import rbf_kernel\n",
    "from joblib import Parallel, delayed\n",
    "import multiprocessing\n",
    "\n",
    "from pygraph.kernels.spKernel import spkernel\n",
    "from pygraph.utils.kernels import deltakernel, kernelsum\n",
    "\n",
    "num_cores = multiprocessing.cpu_count()\n",
    "\n",
    "dslist = [   \n",
    "    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', 'task': 'regression'}, # node symb\n",
    "#     {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n",
    "    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',}, # unlabeled\n",
    "    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',}, # node/edge symb\n",
    "    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
    "        'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n",
    "    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', \n",
    "        'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',}, # contains single node graph, node symb\n",
    "#     {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n",
    "#     {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n",
    "    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, # node/edge symb\n",
    "    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, # node symb/nsymb\n",
    "#     {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n",
    "    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
    "#     {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n",
    "#     {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n",
    "#     {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n",
    "#     {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n",
    "#     {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n",
    "\n",
    "#     {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n",
    "#     {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n",
    "    {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
    "     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n",
    "#     {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n",
    "#     {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
    "#         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "#     {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
    "#         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "#     {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
    "#         'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n",
    "    \n",
    "#     # not working below\n",
    "#     {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
    "#     {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n",
    "#     {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n",
    "#     {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
    "]\n",
    "estimator = spkernel\n",
    "mixkernel = functools.partial(kernelsum, deltakernel, rbf_kernel)\n",
    "param_grid_precomputed = {'node_kernels': [{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}\n",
    "param_grid = [{'C': np.logspace(-10, 10, num = 41, base = 10)}, \n",
    "              {'alpha': np.logspace(-10, 10, num = 41, base = 10)}]\n",
    "    \n",
    "def compute_ds(ds):\n",
    "    print()\n",
    "    print(ds['name'])\n",
    "    model_selection_for_precomputed_kernel(\n",
    "        ds['dataset'], estimator, param_grid_precomputed, \n",
    "        (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \n",
    "        (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30,\n",
    "        datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n",
    "        extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n",
    "        ds_name=ds['name'])\n",
    "    \n",
    "#     %lprun -f spkernel \\\n",
    "#         model_selection_for_precomputed_kernel( \\\n",
    "#             ds['dataset'], estimator, param_grid_precomputed, \\\n",
    "#             (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \\\n",
    "#             (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30, \\\n",
    "#             datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), \\\n",
    "#             extra_params=(ds['extra_params'] if 'extra_params' in ds else None))\n",
    "    print()\n",
    "    \n",
    "Parallel(n_jobs=num_cores, verbose=10)(delayed(compute_ds)(ds) for ds in dslist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "--- This is a regression problem ---\n",
      "\n",
      "\n",
      "I. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "'NoneType' object is not subscriptable",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRemoteTraceback\u001b[0m                           Traceback (most recent call last)",
      "\u001b[0;31mRemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n  File \"/usr/lib/python3.5/multiprocessing/pool.py\", line 119, in worker\n    result = (True, func(*args, **kwds))\n  File \"/usr/lib/python3.5/multiprocessing/pool.py\", line 44, in mapstar\n    return list(map(*args))\n  File \"../pygraph/kernels/spKernel.py\", line 359, in spkernel_do\n    kn = node_kernels['symb']\nTypeError: 'NoneType' object is not subscriptable\n\"\"\"",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-1-b5a6e5aa5a44>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, \n\u001b[0;32m---> 15\u001b[0;31m                                        'regression', NUM_TRIALS=30)\n\u001b[0m",
      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/model_selection_precomputed.py\u001b[0m in \u001b[0;36mmodel_selection_for_precomputed_kernel\u001b[0;34m(datafile, estimator, param_grid_precomputed, param_grid, model_type, NUM_TRIALS, datafile_y, extra_params, ds_name, n_jobs)\u001b[0m\n\u001b[1;32m    120\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams_out\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparam_list_precomputed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    121\u001b[0m             \u001b[0mparams_out\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'n_jobs'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 122\u001b[0;31m             \u001b[0mrtn_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mparams_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    123\u001b[0m             \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrtn_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    124\u001b[0m             \u001b[0mcurrent_run_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrtn_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(node_label, edge_weight, node_kernels, n_jobs, *args)\u001b[0m\n\u001b[1;32m     97\u001b[0m     \u001b[0mdo_partial\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpartial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspkernel_do\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mds_attrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode_label\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode_kernels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     98\u001b[0m     \u001b[0mitr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcombinations_with_replacement\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m     \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpool\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdo_partial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtotal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    100\u001b[0m         \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    101\u001b[0m         \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/lib/python3.5/multiprocessing/pool.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, func, iterable, chunksize)\u001b[0m\n\u001b[1;32m    258\u001b[0m         \u001b[0;32min\u001b[0m \u001b[0ma\u001b[0m \u001b[0mlist\u001b[0m \u001b[0mthat\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mreturned\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    259\u001b[0m         '''\n\u001b[0;32m--> 260\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_map_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miterable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmapstar\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    261\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    262\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mstarmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miterable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/lib/python3.5/multiprocessing/pool.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    606\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    607\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 608\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    609\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    610\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_set\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Process ForkPoolWorker-1:\n",
      "Traceback (most recent call last):\n",
      "  File \"/usr/lib/python3.5/multiprocessing/process.py\", line 249, in _bootstrap\n",
      "    self.run()\n",
      "  File \"/usr/lib/python3.5/multiprocessing/process.py\", line 93, in run\n",
      "    self._target(*self._args, **self._kwargs)\n",
      "  File \"/usr/lib/python3.5/multiprocessing/pool.py\", line 108, in worker\n",
      "    task = get()\n",
      "  File \"/usr/lib/python3.5/multiprocessing/queues.py\", line 343, in get\n",
      "    res = self._reader.recv_bytes()\n",
      "  File \"/usr/lib/python3.5/multiprocessing/connection.py\", line 216, in recv_bytes\n",
      "    buf = self._recv_bytes(maxlength)\n",
      "  File \"/usr/lib/python3.5/multiprocessing/connection.py\", line 407, in _recv_bytes\n",
      "    buf = self._recv(4)\n",
      "  File \"/usr/lib/python3.5/multiprocessing/connection.py\", line 379, in _recv\n",
      "    chunk = read(handle, remaining)\n",
      "KeyboardInterrupt\n"
     ]
    }
   ],
   "source": [
    "%load_ext line_profiler\n",
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import sys\n",
    "sys.path.insert(0, \"../\")\n",
    "from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n",
    "from pygraph.kernels.spKernel import spkernel\n",
    "\n",
    "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
    "estimator = spkernel\n",
    "param_grid_precomputed = {}\n",
    "param_grid = {'alpha': np.logspace(-1, 1, num = 41, base = 10)}\n",
    "\n",
    "model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, \n",
    "                                       'regression', NUM_TRIALS=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " --- This is a regression problem ---\n",
      "\n",
      "\n",
      " Loading dataset from file...\n",
      "\n",
      " Calculating kernel matrix, this could take a while...\n",
      "--- shortest path kernel matrix of size 185 built in 13.3865065574646 seconds ---\n",
      "[[ 3.  1.  3. ...  1.  1.  1.]\n",
      " [ 1.  6.  1. ...  0.  0.  3.]\n",
      " [ 3.  1.  3. ...  1.  1.  1.]\n",
      " ...\n",
      " [ 1.  0.  1. ... 55. 21.  7.]\n",
      " [ 1.  0.  1. ... 21. 55.  7.]\n",
      " [ 1.  3.  1. ...  7.  7. 55.]]\n",
      "\n",
      " Starting calculate accuracy/rmse...\n",
      "calculate performance:  94%|█████████▎| 936/1000 [00:01<00:00, 757.54it/s]\n",
      " Mean performance on train set: 28.360361\n",
      "With standard deviation: 1.357183\n",
      "\n",
      " Mean performance on test set: 35.191954\n",
      "With standard deviation: 4.495767\n",
      "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 771.22it/s]\n",
      "\n",
      "\n",
      "  rmse_test    std_test    rmse_train    std_train    k_time\n",
      "-----------  ----------  ------------  -----------  --------\n",
      "     35.192     4.49577       28.3604      1.35718   13.3865\n"
     ]
    }
   ],
   "source": [
    "%load_ext line_profiler\n",
    "\n",
    "import sys\n",
    "sys.path.insert(0, \"../\")\n",
    "from pygraph.utils.utils import kernel_train_test\n",
    "from pygraph.kernels.spKernel import spkernel\n",
    "\n",
    "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
    "kernel_file_path = 'kernelmatrices_path_acyclic/'\n",
    "\n",
    "kernel_para = dict(edge_weight = 'atom')\n",
    "\n",
    "kernel_train_test(datafile, kernel_file_path, spkernel, kernel_para, normalize = False)\n",
    "\n",
    "# %lprun -f spkernel \\\n",
    "#     kernel_train_test(datafile, kernel_file_path, spkernel, kernel_para, normalize = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# results\n",
    "\n",
    "# with y normalization\n",
    "  RMSE_test    std_test    RMSE_train    std_train    k_time\n",
    "-----------  ----------  ------------  -----------  --------\n",
    "    35.6337     5.23183       32.3805      3.92531   14.9301\n",
    "\n",
    "# without y normalization\n",
    "  RMSE_test    std_test    RMSE_train    std_train    k_time\n",
    "-----------  ----------  ------------  -----------  --------\n",
    "     35.192     4.49577       28.3604      1.35718   14.5768"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "- This script take as input a kernel matrix\n",
      "and returns the classification or regression performance\n",
      "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
      "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
      "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
      "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
      "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
      "correspond to the average of the performances on the test sets. \n",
      "\n",
      "@references\n",
      "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
      "\n",
      "\n",
      " Loading dataset from file...\n",
      "[ -23.7   14.    37.3  109.7   10.8   39.    42.    66.6  135.   148.5\n",
      "   40.    34.6   32.    63.    53.5   67.    64.4   84.7   95.5   92.\n",
      "   84.4  154.   156.   166.   183.    70.3   63.6   52.5   59.    59.5\n",
      "   55.2   88.    83.   104.5  102.    92.   107.4  123.2  112.5  118.5\n",
      "  101.5  173.7  165.5  181.    99.5   92.3   90.1   80.2   82.    91.2\n",
      "   91.5   81.2   93.    69.    86.3   82.   103.   103.5   96.   112.   104.\n",
      "  132.5  123.5  120.3  145.   144.2  142.8  132.   134.2  137.   139.\n",
      "  133.6  120.4  120.   137.   195.8  177.2  181.   185.9  175.7  186.   211.\n",
      "  125.   118.   117.1  107.   102.5  112.    97.4   91.5   87.6  106.5\n",
      "  101.    99.3   90.   137.   114.   126.   124.   140.5  157.5  146.   145.\n",
      "  141.   171.   166.   155.   145.   159.   138.   142.   159.   163.5\n",
      "  229.5  142.   125.   132.   130.5  125.   122.   121.   122.2  112.   106.\n",
      "  114.5  151.   128.5  109.5  126.   147.   158.   147.   165.   188.9\n",
      "  170.   178.   148.5  165.   177.   167.   195.   226.   215.   201.   205.\n",
      "  151.5  165.5  157.   139.   163.   153.5  139.   162.   173.   159.5\n",
      "  159.5  155.5  141.   126.   164.   163.   166.5  146.   165.   159.   195.\n",
      "  218.   250.   235.   186.5  156.5  162.   162.   170.2  173.2  186.8\n",
      "  173.   187.   174.   188.5  199.   228.   215.   216.   240. ]\n",
      "\n",
      " Loading the matrix from file...\n",
      "[[  3.   1.   3. ...,   1.   1.   1.]\n",
      " [  1.   6.   1. ...,   0.   0.   3.]\n",
      " [  3.   1.   3. ...,   1.   1.   1.]\n",
      " ..., \n",
      " [  1.   0.   1. ...,  55.  21.   7.]\n",
      " [  1.   0.   1. ...,  21.  55.   7.]\n",
      " [  1.   3.   1. ...,   7.   7.  55.]]\n",
      "\n",
      " --- This is a regression problem ---\n",
      "\n",
      " Starting split 10...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 12 with parameter alpha = 100.000000\n",
      "The best performance on the validation set is: 40.422382\n",
      "The corresponding performance on test set is: 47.424532\n",
      "\n",
      " Starting split 11...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 12 with parameter alpha = 100.000000\n",
      "The best performance on the validation set is: 33.084913\n",
      "The corresponding performance on test set is: 35.493699\n",
      "\n",
      " Starting split 12...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 12 with parameter alpha = 100.000000\n",
      "The best performance on the validation set is: 31.306710\n",
      "The corresponding performance on test set is: 33.173366\n",
      "\n",
      " Starting split 13...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 12 with parameter alpha = 100.000000\n",
      "The best performance on the validation set is: 43.500424\n",
      "The corresponding performance on test set is: 32.633129\n",
      "\n",
      " Starting split 14...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 10 with parameter alpha = 1.000000\n",
      "The best performance on the validation set is: 53.561752\n",
      "The corresponding performance on test set is: 42.883548\n",
      "\n",
      " Starting split 15...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 12 with parameter alpha = 100.000000\n",
      "The best performance on the validation set is: 40.444773\n",
      "The corresponding performance on test set is: 32.713040\n",
      "\n",
      " Starting split 16...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 11 with parameter alpha = 10.000000\n",
      "The best performance on the validation set is: 37.046818\n",
      "The corresponding performance on test set is: 37.337851\n",
      "\n",
      " Starting split 17...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 12 with parameter alpha = 100.000000\n",
      "The best performance on the validation set is: 39.907628\n",
      "The corresponding performance on test set is: 38.889064\n",
      "\n",
      " Starting split 18...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 12 with parameter alpha = 100.000000\n",
      "The best performance on the validation set is: 29.879950\n",
      "The corresponding performance on test set is: 27.652558\n",
      "\n",
      " Starting split 19...\n",
      "\n",
      " Normalizing output y...\n",
      "The best performance is for trial 11 with parameter alpha = 10.000000\n",
      "The best performance on the validation set is: 44.911892\n",
      "The corresponding performance on test set is: 35.804454\n",
      "\n",
      " Mean performance on val set: 39.406724\n",
      "With standard deviation: 6.720820\n",
      "\n",
      " Mean performance on test set: 36.400524\n",
      "With standard deviation: 5.352940\n"
     ]
    }
   ],
   "source": [
    "# Author: Elisabetta Ghisu\n",
    "\n",
    "\"\"\"\n",
    "- This script take as input a kernel matrix\n",
    "and returns the classification or regression performance\n",
    "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
    "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
    "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
    "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
    "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
    "correspond to the average of the performances on the test sets. \n",
    "\n",
    "@references\n",
    "    https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
    "\"\"\"\n",
    "\n",
    "print(__doc__)\n",
    "\n",
    "import sys\n",
    "import pathlib\n",
    "sys.path.insert(0, \"../\")\n",
    "from tabulate import tabulate\n",
    "\n",
    "import random\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.kernel_ridge import KernelRidge # 0.17\n",
    "from sklearn.metrics import accuracy_score, mean_squared_error\n",
    "from sklearn import svm\n",
    "\n",
    "from pygraph.kernels.spkernel import spkernel\n",
    "from pygraph.utils.graphfiles import loadDataset\n",
    "\n",
    "print('\\n Loading dataset from file...')\n",
    "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
    "y = np.array(y)\n",
    "print(y)\n",
    "\n",
    "kernel_file_path = 'kernelmatrix.ds'\n",
    "path = pathlib.Path(kernel_file_path)\n",
    "if path.is_file():\n",
    "    print('\\n Loading the matrix from file...')\n",
    "    Kmatrix = np.loadtxt(kernel_file_path)\n",
    "    print(Kmatrix)\n",
    "else:\n",
    "    print('\\n Calculating kernel matrix, this could take a while...')\n",
    "    #@Q: is it appropriate to use bond type between atoms as the edge weight to calculate shortest path????????\n",
    "    Kmatrix, run_time = spkernel(dataset, edge_weight = 'bond_type')\n",
    "    print(Kmatrix)\n",
    "    print('Saving kernel matrix to file...')\n",
    "    np.savetxt(kernel_file_path, Kmatrix)\n",
    "\n",
    "# setup the parameters\n",
    "model_type = 'regression' # Regression or classification problem\n",
    "print('\\n --- This is a %s problem ---' % model_type)\n",
    "\n",
    "datasize = len(dataset)\n",
    "trials = 21 # Trials for hyperparameters random search\n",
    "splits = 10 # Number of splits of the data\n",
    "alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n",
    "C_grid = np.logspace(-10, 10, num = trials, base = 10)\n",
    "random.seed(20) # Set the seed for uniform parameter distribution\n",
    "\n",
    "\n",
    "\"\"\"\n",
    "-  Here starts the main program\n",
    "-  First we permute the data, then for each split we evaluate corresponding performances\n",
    "-  In the end, the performances are averaged over the test sets\n",
    "\"\"\"\n",
    "\n",
    "# Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n",
    "val_split = []\n",
    "test_split = []\n",
    "\n",
    "# For each split of the data\n",
    "for j in range(10, 10 + splits):\n",
    "    print('\\n Starting split %d...' % j)\n",
    "\n",
    "    # Set the random set for data permutation\n",
    "    random_state = int(j)\n",
    "    np.random.seed(random_state)\n",
    "    idx_perm = np.random.permutation(datasize)\n",
    "#     print(idx_perm)\n",
    "    \n",
    "    # Permute the data\n",
    "    y_perm = y[idx_perm] # targets permutation\n",
    "#     print(y_perm)\n",
    "    Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n",
    "#     print(Kmatrix_perm)\n",
    "    Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n",
    "    \n",
    "    # Set the training, validation and test\n",
    "    # Note: the percentage can be set up by the user\n",
    "    num_train_val = int((datasize * 90) / 100)         # 90% (of entire dataset) for training and validation\n",
    "    num_test = datasize - num_train_val              # 10% (of entire dataset) for test\n",
    "    num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n",
    "    num_val = num_train_val - num_train # 10% (of train + val) for validation\n",
    "    \n",
    "    # Split the kernel matrix\n",
    "    Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n",
    "    Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n",
    "    Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n",
    "\n",
    "    # Split the targets\n",
    "    y_train = y_perm[0:num_train]\n",
    "\n",
    "    # Normalization step (for real valued targets only)\n",
    "    print('\\n Normalizing output y...')\n",
    "    if model_type == 'regression':\n",
    "        y_train_mean = np.mean(y_train)\n",
    "        y_train_std = np.std(y_train)\n",
    "        y_train = (y_train - y_train_mean) / float(y_train_std)\n",
    "#         print(y)\n",
    "        \n",
    "    y_val = y_perm[num_train:(num_train + num_val)]\n",
    "    y_test = y_perm[(num_train + num_val):datasize]\n",
    "    \n",
    "    # Record the performance for each parameter trial respectively on validation and test set\n",
    "    perf_all_val = []\n",
    "    perf_all_test = []\n",
    "    \n",
    "    # For each parameter trial\n",
    "    for i in range(trials):\n",
    "        # For regression use the Kernel Ridge method\n",
    "        if model_type == 'regression':\n",
    "#             print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n",
    "\n",
    "            # Fit the kernel ridge model\n",
    "            KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n",
    "#             KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n",
    "            KR.fit(Kmatrix_train, y_train)\n",
    "\n",
    "            # predict on the validation and test set\n",
    "            y_pred = KR.predict(Kmatrix_val)\n",
    "            y_pred_test = KR.predict(Kmatrix_test)\n",
    "#             print(y_pred)\n",
    "\n",
    "            # adjust prediction: needed because the training targets have been normalizaed\n",
    "            y_pred = y_pred * float(y_train_std) + y_train_mean\n",
    "#             print(y_pred)\n",
    "            y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n",
    "#             print(y_pred_test)\n",
    "\n",
    "            # root mean squared error on validation\n",
    "            rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n",
    "            perf_all_val.append(rmse)\n",
    "\n",
    "            # root mean squared error in test \n",
    "            rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n",
    "            perf_all_test.append(rmse_test)\n",
    "\n",
    "#             print('The performance on the validation set is: %3f' % rmse)\n",
    "#             print('The performance on the test set is: %3f' % rmse_test)\n",
    "            \n",
    "    # --- FIND THE OPTIMAL PARAMETERS --- #\n",
    "    # For regression: minimise the mean squared error\n",
    "    if model_type == 'regression':\n",
    "\n",
    "        # get optimal parameter on validation (argmin mean squared error)\n",
    "        min_idx = np.argmin(perf_all_test)\n",
    "        alpha_opt = alpha_grid[min_idx]\n",
    "\n",
    "        # performance corresponding to optimal parameter on val\n",
    "        perf_val_opt = perf_all_val[min_idx]\n",
    "\n",
    "        # corresponding performance on test for the same parameter\n",
    "        perf_test_opt = perf_all_test[min_idx]\n",
    "\n",
    "        print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n",
    "        print('The best performance on the validation set is: %3f' % perf_val_opt)\n",
    "        print('The corresponding performance on test set is: %3f' % perf_test_opt)\n",
    "\n",
    "    # append the best performance on validation\n",
    "    # at the current split\n",
    "    val_split.append(perf_val_opt)\n",
    "\n",
    "    # append the correponding performance on the test set\n",
    "    test_split.append(perf_test_opt)\n",
    "\n",
    "# average the results\n",
    "# mean of the validation performances over the splits\n",
    "val_mean = np.mean(np.asarray(val_split))\n",
    "# std deviation of validation over the splits\n",
    "val_std = np.std(np.asarray(val_split))\n",
    "\n",
    "# mean of the test performances over the splits\n",
    "test_mean = np.mean(np.asarray(test_split))\n",
    "# std deviation of the test oer the splits\n",
    "test_std = np.std(np.asarray(test_split))\n",
    "\n",
    "print('\\n Mean performance on val set: %3f' % val_mean)\n",
    "print('With standard deviation: %3f' % val_std)\n",
    "print('\\n Mean performance on test set: %3f' % test_mean)\n",
    "print('With standard deviation: %3f' % test_std)"
    "    print()"
   ]
  }
 ],
@@ -749,7 +285,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
--- a/notebooks/run_structuralspkernel.ipynb
+++ b/notebooks/run_structuralspkernel.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
@@ -12,9 +12,9 @@
     "output_type": "stream",
     "text": [
      "\n",
      "MAO\n",
      "Acyclic\n",
      "\n",
      "--- This is a classification problem ---\n",
      "--- This is a regression problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
@@ -23,162 +23,47 @@
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting shortest paths: 68it [00:00, 629.46it/s]\n",
      "calculating kernels: 2346it [00:22, 102.31it/s]\n",
      "getting shortest paths: 183it [00:00, 5316.42it/s]\n",
      "calculating kernels: 16836it [00:03, 4625.84it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 183 built in 3.8611345291137695 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n",
      "\n",
      " --- shortest path kernel matrix of size 68 built in 23.390946626663208 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'edge_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'n_jobs': 8} is: \n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation:   0%|          | 0/30 [00:00<?, ?it/s]0 0\n",
      "params_in:  {'C': 1e-10}\n",
      "0 1\n",
      "params_in:  {'C': 3.1622776601683795e-10}\n",
      "0 2\n",
      "params_in:  {'C': 1e-09}\n",
      "0 3\n",
      "params_in:  {'C': 3.1622776601683795e-09}\n",
      "0 4\n",
      "params_in:  {'C': 1e-08}\n",
      "0 5\n",
      "params_in:  {'C': 3.162277660168379e-08}\n",
      "0 6\n",
      "params_in:  {'C': 1e-07}\n",
      "0 7\n",
      "params_in:  {'C': 3.162277660168379e-07}\n",
      "0 8\n",
      "params_in:  {'C': 1e-06}\n",
      "0 9\n",
      "params_in:  {'C': 3.162277660168379e-06}\n",
      "0 10\n",
      "params_in:  {'C': 1e-05}\n",
      "0 11\n",
      "params_in:  {'C': 3.1622776601683795e-05}\n",
      "0 12\n",
      "params_in:  {'C': 0.0001}\n",
      "0 13\n",
      "params_in:  {'C': 0.00031622776601683794}\n",
      "0 14\n",
      "params_in:  {'C': 0.001}\n",
      "0 15\n",
      "params_in:  {'C': 0.0031622776601683794}\n",
      "0 16\n",
      "params_in:  {'C': 0.01}\n",
      "0 17\n",
      "params_in:  {'C': 0.03162277660168379}\n",
      "0 18\n",
      "params_in:  {'C': 0.1}\n",
      "0 19\n",
      "params_in:  {'C': 0.31622776601683794}\n",
      "0 20\n",
      "params_in:  {'C': 1.0}\n",
      "0 21\n",
      "params_in:  {'C': 3.1622776601683795}\n",
      "0 22\n",
      "params_in:  {'C': 10.0}\n",
      "0 23\n",
      "params_in:  {'C': 31.622776601683793}\n",
      "0 24\n",
      "params_in:  {'C': 100.0}\n",
      "0 25\n",
      "params_in:  {'C': 316.22776601683796}\n",
      "0 26\n",
      "params_in:  {'C': 1000.0}\n",
      "0 27\n",
      "params_in:  {'C': 3162.2776601683795}\n",
      "0 28\n",
      "params_in:  {'C': 10000.0}\n",
      "0 29\n",
      "params_in:  {'C': 31622.776601683792}\n",
      "0 30\n",
      "params_in:  {'C': 100000.0}\n",
      "0 31\n",
      "params_in:  {'C': 316227.7660168379}\n",
      "0 32\n",
      "params_in:  {'C': 1000000.0}\n",
      "0 33\n",
      "params_in:  {'C': 3162277.6601683795}\n",
      "0 34\n",
      "params_in:  {'C': 10000000.0}\n",
      "0 35\n",
      "params_in:  {'C': 31622776.60168379}\n",
      "0 36\n",
      "params_in:  {'C': 100000000.0}\n",
      "0 37\n",
      "params_in:  {'C': 316227766.01683795}\n",
      "0 38\n",
      "params_in:  {'C': 1000000000.0}\n",
      "0 39\n",
      "params_in:  {'C': 3162277660.1683793}\n",
      "0 40\n",
      "params_in:  {'C': 10000000000.0}\n",
      "val_pref:  [[0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
      "  0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
      "  0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
      "  0.59285714 0.59285714 0.55952381 0.71666667 0.81666667 0.81666667\n",
      "  0.83571429 0.86666667 0.9        0.9        0.9        0.9\n",
      "  0.9        0.9        0.9        0.9        0.9        0.9\n",
      "  0.9        0.9        0.9        0.9        0.9       ]]\n",
      "test_pref:  [[0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
      "  0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
      "  0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
      "  0.28571429 0.28571429 0.61428571 0.84285714 0.84285714 0.85714286\n",
      "  0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
      "  0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
      "  0.85714286 0.85714286 0.85714286 0.85714286 0.85714286]]\n",
      "cross validation: 100%|██████████| 30/30 [00:11<00:00,  2.75it/s]\n",
      "\n",
      "cross validation: 30it [00:03,  8.71it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "val_pref:  [0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
      " 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
      " 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n",
      " 0.59285714 0.59285714 0.55952381 0.71666667 0.81666667 0.81666667\n",
      " 0.83571429 0.86666667 0.9        0.9        0.9        0.9\n",
      " 0.9        0.9        0.9        0.9        0.9        0.9\n",
      " 0.9        0.9        0.9        0.9        0.9       ]\n",
      "test_pref:  [0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
      " 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
      " 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n",
      " 0.28571429 0.28571429 0.61428571 0.84285714 0.84285714 0.85714286\n",
      " 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
      " 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n",
      " 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286]\n",
      "average_val_scores:  [[0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n",
      "  0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n",
      "  0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n",
      "  0.55301587 0.55468254 0.61507937 0.71777778 0.78039683 0.80531746\n",
      "  0.86198413 0.89531746 0.89420635 0.87190476 0.85761905 0.85761905\n",
      "  0.85761905 0.85761905 0.85761905 0.85761905 0.85761905 0.85761905\n",
      "  0.85761905 0.85761905 0.85761905 0.85761905 0.85761905]]\n",
      "best_val_perf:  0.8953174603174604\n",
      "\n",
      "best_params_out:  [{'edge_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'C': 316.22776601683796}]\n",
      "\n",
      "best_val_perf:  0.8953174603174604\n",
      "best_val_std:  0.029090007386146643\n",
      "(array([0]), array([25]))\n",
      "[0]\n",
      "[[0.5047619  0.5047619  0.5047619  0.5047619  0.5047619  0.5047619\n",
      "  0.5047619  0.5047619  0.5047619  0.5047619  0.5047619  0.5047619\n",
      "  0.5047619  0.5047619  0.5047619  0.5047619  0.5047619  0.5047619\n",
      "  0.5047619  0.49761905 0.66       0.75857143 0.78857143 0.82857143\n",
      "  0.85285714 0.86380952 0.84428571 0.82190476 0.81571429 0.81571429\n",
      "  0.81571429 0.81571429 0.81571429 0.81571429 0.81571429 0.81571429\n",
      "  0.81571429 0.81571429 0.81571429 0.81571429 0.81571429]]\n",
      "final_performance:  [0.8638095238095236]\n",
      "final_confidence:  [0.10509426306201483]\n",
      "train_performance: [0.9857934904601572]\n",
      "train_std:  [0.00730576290039335]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 23.39±nans\n",
      "time to calculate best gram matrix: 23.39±nans\n",
      "total training time with all hyper-param choices: 34.88s\n",
      "best_params_out:  [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'alpha': 0.0031622776601683794}]\n",
      "\n",
      "best_val_perf:  12.673707811197355\n",
      "best_val_std:  0.8773195213759171\n",
      "final_performance:  [12.972668262063593]\n",
      "final_confidence:  [3.7642237202379087]\n",
      "train_performance: [3.934708519599526]\n",
      "train_std:  [0.16225809646161615]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 3.86±nans\n",
      "time to calculate best gram matrix: 3.86±nans\n",
      "total training time with all hyper-param choices: 7.74s\n",
      "\n",
      "\n",
      "\n",
      "Alkane\n",
      "\n",
      "--- This is a regression problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n"
     ]
    },
@@ -191,17 +76,112 @@
      "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  ret = ret.dtype.type(ret / rcount)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "getting shortest paths: 150it [00:00, 8822.07it/s]\n",
      "calculating kernels: 11325it [00:02, 5167.04it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 150 built in 2.394453525543213 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n",
      "\n",
      "\n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 30it [00:02, 10.78it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "best_params_out:  [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'alpha': 0.1}]\n",
      "\n",
      "best_val_perf:  11.082918177885857\n",
      "best_val_std:  0.3037589925734673\n",
      "final_performance:  [7.8261546009779925]\n",
      "final_confidence:  [1.59375970943081]\n",
      "train_performance: [7.988630946761633]\n",
      "train_std:  [0.16054607648943253]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 2.39±nans\n",
      "time to calculate best gram matrix: 2.39±nans\n",
      "total training time with all hyper-param choices: 5.49s\n",
      "\n",
      "\n",
      "\n",
      "MAO\n",
      "\n",
      "--- This is a classification problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting shortest paths: 68it [00:00, 567.53it/s]\n",
      "calculating kernels: 2346it [00:14, 161.71it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 68 built in 14.833482265472412 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n",
      "\n",
      "\n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 30it [00:02, 13.38it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "best_params_out:  [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'C': 1000.0}]\n",
      "\n",
      "best_val_perf:  0.9084126984126983\n",
      "best_val_std:  0.027912022159840448\n",
      "final_performance:  [0.9085714285714286]\n",
      "final_confidence:  [0.0879511091875412]\n",
      "train_performance: [0.9679438832772166]\n",
      "train_std:  [0.00754192133247499]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 14.83±nans\n",
      "time to calculate best gram matrix: 14.83±nans\n",
      "total training time with all hyper-param choices: 17.42s\n",
      "\n",
      "\n",
      "\n",
      "PAH\n",
      "\n",
      "--- This is a classification problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting shortest paths: 94it [00:00, 447.28it/s]\n",
      "calculating kernels: 4465it [01:04, 68.94it/s] \n",
      "\n",
      " --- shortest path kernel matrix of size 94 built in 65.20552921295166 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n",
      "\n",
      "\n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 0it [00:00, ?it/s]"
     ]
    }
   ],
   "source": [
    "#!/usr/bin/env python3\n",
    "# -*- coding: utf-8 -*-\n",
    "\"\"\"\n",
    "Created on Fri Sep 28 16:37:29 2018\n",
    "\n",
    "@author: ljia\n",
    "\"\"\"\n",
    "\n",
    "import functools\n",
    "from libs import *\n",
    "import multiprocessing\n",
@@ -210,19 +190,19 @@
    "from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n",
    "\n",
    "dslist = [\n",
    "#    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n",
    "#        'task': 'regression'},  # node symb\n",
    "#    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
    "#             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  \n",
    "#    # contains single node graph, node symb\n",
    "    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n",
    "        'task': 'regression'},  # node symb\n",
    "    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
    "             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  \n",
    "    # contains single node graph, node symb\n",
    "    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb\n",
    "#    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled\n",
    "#    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
    "#             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb\n",
    "#    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
    "    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled\n",
    "    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
    "             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb\n",
    "    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
    "    # node nsymb\n",
    "#    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "#    # node symb/nsymb\n",
    "    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "    # node symb/nsymb\n",
    "#    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
    "#    # node/edge symb\n",
    "#    {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
@@ -260,7 +240,8 @@
    "param_grid_precomputed = {'node_kernels': \n",
    "    [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],\n",
    "    'edge_kernels': \n",
    "    [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n",
    "    [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],\n",
    "    'compute_method': ['naive']}\n",
    "param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n",
    "              {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n",
    "\n",
@@ -300,7 +281,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
--- a/notebooks/run_structuralspkernel.py
+++ b/notebooks/run_structuralspkernel.py
@@ -14,17 +14,17 @@ from pygraph.kernels.structuralspKernel import structuralspkernel
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

 dslist = [
 #    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
 #        'task': 'regression'},  # node symb
 #    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
 #             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  
 #    # contains single node graph, node symb
 #    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
 #    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
 #    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
 #             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
 #    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
 #    # node nsymb
    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
        'task': 'regression'},  # node symb
    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  
    # contains single node graph, node symb
    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
    # node nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 #    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
--- a/notebooks/run_untilhpathkernel.ipynb
+++ b/notebooks/run_untilhpathkernel.ipynb
--- a/notebooks/test_lib.ipynb
+++ b/notebooks/test_lib.ipynb
--- a/notebooks/test_modelselection.ipynb
+++ b/notebooks/test_modelselection.ipynb
--- a/notebooks/test_networkx.ipynb
+++ b/notebooks/test_networkx.ipynb
--- a/notebooks/test_scikit_ksvm.ipynb
+++ b/notebooks/test_scikit_ksvm.ipynb
--- a/notebooks/test_spkernel.ipynb
+++ b/notebooks/test_spkernel.ipynb
--- a/notebooks/tests/memory_profile.ipynb
+++ b/notebooks/tests/memory_profile.ipynb
@@ -0,0 +1,821 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Acyclic\n",
      "\n",
      "--- This is a regression problem ---\n",
      "\n",
      "\n",
      "1. Loading dataset from file...\n",
      "\n",
      "2. Calculating gram matrices. This could take a while...\n",
      "\n",
      " None edge weight specified. Set all weight to 1.\n",
      "\n",
      "getting sp graphs: 183it [00:00, 1871.37it/s]\n",
      "calculating kernels: 16836it [00:16, 1014.42it/s]\n",
      "\n",
      " --- shortest path kernel matrix of size 183 built in 16.947543382644653 seconds ---\n",
      "\n",
      "the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8} is: \n",
      "\n",
      "\n",
      "\n",
      "1 gram matrices are calculated, 0 of which are ignored.\n",
      "\n",
      "3. Fitting and predicting using nested cross validation. This could really take a while...\n",
      "cross validation: 30it [00:12,  2.03it/s]\n",
      "\n",
      "4. Getting final performance...\n",
      "best_params_out:  [{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8}]\n",
      "best_params_in:  [{'alpha': 1e-06}]\n",
      "\n",
      "best_val_perf:  9.55244065682399\n",
      "best_val_std:  0.5574811966683159\n",
      "final_performance:  [9.724426192585643]\n",
      "final_confidence:  [2.999822095078807]\n",
      "train_performance: [6.141755071354953]\n",
      "train_std:  [0.2732168016478284]\n",
      "\n",
      "time to calculate gram matrix with different hyper-params: 16.95±nans\n",
      "time to calculate best gram matrix: 16.95±nans\n",
      "total training time with all hyper-param choices: 32.74s\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n",
      "  keepdims=keepdims)\n",
      "/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  ret = ret.dtype.type(ret / rcount)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Filename: ../../pygraph/utils/model_selection_precomputed.py\n",
      "\n",
      "Line #    Mem usage    Increment   Line Contents\n",
      "================================================\n",
      "    24    115.2 MiB    115.2 MiB   @profile\n",
      "    25                             def model_selection_for_precomputed_kernel(datafile,\n",
      "    26                                                                        estimator,\n",
      "    27                                                                        param_grid_precomputed,\n",
      "    28                                                                        param_grid,\n",
      "    29                                                                        model_type,\n",
      "    30                                                                        NUM_TRIALS=30,\n",
      "    31                                                                        datafile_y=None,\n",
      "    32                                                                        extra_params=None,\n",
      "    33                                                                        ds_name='ds-unknown',\n",
      "    34                                                                        n_jobs=1,\n",
      "    35                                                                        read_gm_from_file=False):\n",
      "    36                                 \"\"\"Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results.\n",
      "    37                             \n",
      "    38                                 Parameters\n",
      "    39                                 ----------\n",
      "    40                                 datafile : string\n",
      "    41                                     Path of dataset file.\n",
      "    42                                 estimator : function\n",
      "    43                                     kernel function used to estimate. This function needs to return a gram matrix.\n",
      "    44                                 param_grid_precomputed : dictionary\n",
      "    45                                     Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n",
      "    46                                 param_grid : dictionary\n",
      "    47                                     Dictionary with names (string) of parameters used as penelties as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n",
      "    48                                 model_type : string\n",
      "    49                                     Typr of the problem, can be regression or classification.\n",
      "    50                                 NUM_TRIALS : integer\n",
      "    51                                     Number of random trials of outer cv loop. The default is 30.\n",
      "    52                                 datafile_y : string\n",
      "    53                                     Path of file storing y data. This parameter is optional depending on the given dataset file.\n",
      "    54                                 read_gm_from_file : boolean\n",
      "    55                                     Whether gram matrices are loaded from file.\n",
      "    56                             \n",
      "    57                                 Examples\n",
      "    58                                 --------\n",
      "    59                                 >>> import numpy as np\n",
      "    60                                 >>> import sys\n",
      "    61                                 >>> sys.path.insert(0, \"../\")\n",
      "    62                                 >>> from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n",
      "    63                                 >>> from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n",
      "    64                                 >>>\n",
      "    65                                 >>> datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
      "    66                                 >>> estimator = weisfeilerlehmankernel\n",
      "    67                                 >>> param_grid_precomputed = {'height': [0,1,2,3,4,5,6,7,8,9,10], 'base_kernel': ['subtree']}\n",
      "    68                                 >>> param_grid = {\"alpha\": np.logspace(-2, 2, num = 10, base = 10)}\n",
      "    69                                 >>>\n",
      "    70                                 >>> model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, 'regression')\n",
      "    71                                 \"\"\"\n",
      "    72    115.2 MiB      0.0 MiB       tqdm.monitor_interval = 0\n",
      "    73                             \n",
      "    74    115.2 MiB      0.0 MiB       results_dir = '../notebooks/results/' + estimator.__name__\n",
      "    75    115.2 MiB      0.0 MiB       if not os.path.exists(results_dir):\n",
      "    76                                     os.makedirs(results_dir)\n",
      "    77                                 # a string to save all the results.\n",
      "    78    115.2 MiB      0.0 MiB       str_fw = '###################### log time: ' + datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\") + '. ######################\\n\\n'\n",
      "    79    115.2 MiB      0.0 MiB       str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\\n# including gram matrices, serial numbers for gram matrix figures and performance.\\n\\n'\n",
      "    80                             \n",
      "    81                                 # setup the model type\n",
      "    82    115.2 MiB      0.0 MiB       model_type = model_type.lower()\n",
      "    83    115.2 MiB      0.0 MiB       if model_type != 'regression' and model_type != 'classification':\n",
      "    84                                     raise Exception(\n",
      "    85                                         'The model type is incorrect! Please choose from regression or classification.'\n",
      "    86                                     )\n",
      "    87    115.2 MiB      0.0 MiB       print()\n",
      "    88    115.2 MiB      0.0 MiB       print('--- This is a %s problem ---' % model_type)\n",
      "    89    115.2 MiB      0.0 MiB       str_fw += 'This is a %s problem.\\n' % model_type\n",
      "    90                                 \n",
      "    91                                 # calculate gram matrices rather than read them from file.\n",
      "    92    115.2 MiB      0.0 MiB       if read_gm_from_file == False:\n",
      "    93                                     # Load the dataset\n",
      "    94    115.2 MiB      0.0 MiB           print()\n",
      "    95    115.2 MiB      0.0 MiB           print('\\n1. Loading dataset from file...')\n",
      "    96    115.2 MiB      0.0 MiB           if isinstance(datafile, str):\n",
      "    97    115.2 MiB      0.0 MiB               dataset, y_all = loadDataset(\n",
      "    98    116.3 MiB      1.1 MiB                       datafile, filename_y=datafile_y, extra_params=extra_params)\n",
      "    99                                     else: # load data directly from variable.\n",
      "   100                                         dataset = datafile\n",
      "   101                                         y_all = datafile_y                \n",
      "   102                             \n",
      "   103                                     #     import matplotlib.pyplot as plt\n",
      "   104                                     #     import networkx as nx\n",
      "   105                                     #     nx.draw_networkx(dataset[30])\n",
      "   106                                     #     plt.show()\n",
      "   107                                 \n",
      "   108                                     # Grid of parameters with a discrete number of values for each.\n",
      "   109    116.3 MiB      0.0 MiB           param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n",
      "   110    116.3 MiB      0.0 MiB           param_list = list(ParameterGrid(param_grid))\n",
      "   111                                 \n",
      "   112    116.3 MiB      0.0 MiB           gram_matrices = [\n",
      "   113                                     ]  # a list to store gram matrices for all param_grid_precomputed\n",
      "   114    116.3 MiB      0.0 MiB           gram_matrix_time = [\n",
      "   115                                     ]  # a list to store time to calculate gram matrices\n",
      "   116    116.3 MiB      0.0 MiB           param_list_pre_revised = [\n",
      "   117                                     ]  # list to store param grids precomputed ignoring the useless ones\n",
      "   118                                 \n",
      "   119                                     # calculate all gram matrices\n",
      "   120    116.3 MiB      0.0 MiB           print()\n",
      "   121    116.3 MiB      0.0 MiB           print('2. Calculating gram matrices. This could take a while...')\n",
      "   122    116.3 MiB      0.0 MiB           str_fw += '\\nII. Gram matrices.\\n\\n'\n",
      "   123    116.3 MiB      0.0 MiB           tts = time.time()  # start training time\n",
      "   124    116.3 MiB      0.0 MiB           nb_gm_ignore = 0  # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)\n",
      "   125    145.3 MiB      0.0 MiB           for idx, params_out in enumerate(param_list_precomputed):\n",
      "   126    116.3 MiB      0.0 MiB               y = y_all[:]\n",
      "   127    116.3 MiB      0.0 MiB               params_out['n_jobs'] = n_jobs\n",
      "   128                             #            print(dataset)\n",
      "   129                             #            import networkx as nx\n",
      "   130                             #            nx.draw_networkx(dataset[1])\n",
      "   131                             #            plt.show()\n",
      "   132    119.5 MiB      3.1 MiB               rtn_data = estimator(dataset[:], **params_out)\n",
      "   133    119.5 MiB      0.0 MiB               Kmatrix = rtn_data[0]\n",
      "   134    119.5 MiB      0.0 MiB               current_run_time = rtn_data[1]\n",
      "   135                                         # for some kernels, some graphs in datasets may not meet the \n",
      "   136                                         # kernels' requirements for graph structure. These graphs are trimmed. \n",
      "   137    119.5 MiB      0.0 MiB               if len(rtn_data) == 3:\n",
      "   138    119.5 MiB      0.0 MiB                   idx_trim = rtn_data[2]  # the index of trimmed graph list\n",
      "   139    119.5 MiB      0.0 MiB                   y = [y[idxt] for idxt in idx_trim] # trim y accordingly\n",
      "   140                             #            Kmatrix = np.random.rand(2250, 2250)\n",
      "   141                             #            current_run_time = 0.1\n",
      "   142                                         \n",
      "   143                                         # remove graphs whose kernels with themselves are zeros\n",
      "   144    119.5 MiB      0.0 MiB               Kmatrix_diag = Kmatrix.diagonal().copy()\n",
      "   145    119.5 MiB      0.0 MiB               nb_g_ignore = 0\n",
      "   146    119.5 MiB      0.0 MiB               for idxk, diag in enumerate(Kmatrix_diag):\n",
      "   147    119.5 MiB      0.0 MiB                   if diag == 0:\n",
      "   148                                                 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)\n",
      "   149                                                 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)\n",
      "   150                                                 nb_g_ignore += 1\n",
      "   151                                         # normalization\n",
      "   152    119.5 MiB      0.0 MiB               Kmatrix_diag = Kmatrix.diagonal().copy()\n",
      "   153    119.5 MiB      0.0 MiB               for i in range(len(Kmatrix)):\n",
      "   154    119.5 MiB      0.0 MiB                   for j in range(i, len(Kmatrix)):\n",
      "   155    119.5 MiB      0.0 MiB                       Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])\n",
      "   156    119.5 MiB      0.0 MiB                       Kmatrix[j][i] = Kmatrix[i][j]\n",
      "   157                                 \n",
      "   158    119.5 MiB      0.0 MiB               print()\n",
      "   159    119.5 MiB      0.0 MiB               if params_out == {}:\n",
      "   160                                             print('the gram matrix is: ')\n",
      "   161                                             str_fw += 'the gram matrix is:\\n\\n'\n",
      "   162                                         else:\n",
      "   163    119.5 MiB      0.0 MiB                   print('the gram matrix with parameters', params_out, 'is: \\n\\n')\n",
      "   164    119.5 MiB      0.0 MiB                   str_fw += 'the gram matrix with parameters %s is:\\n\\n' % params_out\n",
      "   165    119.5 MiB      0.0 MiB               if len(Kmatrix) < 2:\n",
      "   166                                             nb_gm_ignore += 1\n",
      "   167                                             print('ignored, as at most only one of all its diagonal value is non-zero.')\n",
      "   168                                             str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\\n\\n'\n",
      "   169                                         else:                \n",
      "   170    119.5 MiB      0.0 MiB                   if np.isnan(Kmatrix).any(\n",
      "   171                                             ):  # if the matrix contains elements that are not numbers\n",
      "   172                                                 nb_gm_ignore += 1\n",
      "   173                                                 print('ignored, as it contains elements that are not numbers.')\n",
      "   174                                                 str_fw += 'ignored, as it contains elements that are not numbers.\\n\\n'\n",
      "   175                                             else:\n",
      "   176                             #                    print(Kmatrix)\n",
      "   177    119.5 MiB      0.0 MiB                       str_fw += np.array2string(\n",
      "   178    119.5 MiB      0.0 MiB                               Kmatrix,\n",
      "   179    119.5 MiB      0.0 MiB                               separator=',') + '\\n\\n'\n",
      "   180                             #                            separator=',',\n",
      "   181                             #                            threshold=np.inf,\n",
      "   182                             #                            floatmode='unique') + '\\n\\n'\n",
      "   183                             \n",
      "   184    119.5 MiB      0.0 MiB                       fig_file_name = results_dir + '/GM[ds]' + ds_name\n",
      "   185    119.5 MiB      0.0 MiB                       if params_out != {}:\n",
      "   186    119.5 MiB      0.0 MiB                           fig_file_name += '[params]' + str(idx)\n",
      "   187    120.3 MiB      0.7 MiB                       plt.imshow(Kmatrix)\n",
      "   188    120.4 MiB      0.1 MiB                       plt.colorbar()\n",
      "   189    145.3 MiB     24.9 MiB                       plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)\n",
      "   190                             #                    plt.show()\n",
      "   191    145.3 MiB      0.0 MiB                       plt.clf()\n",
      "   192    145.3 MiB      0.0 MiB                       gram_matrices.append(Kmatrix)\n",
      "   193    145.3 MiB      0.0 MiB                       gram_matrix_time.append(current_run_time)\n",
      "   194    145.3 MiB      0.0 MiB                       param_list_pre_revised.append(params_out)\n",
      "   195    145.3 MiB      0.0 MiB                       if nb_g_ignore > 0:\n",
      "   196                                                     print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)\n",
      "   197                                                     str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore\n",
      "   198    145.3 MiB      0.0 MiB           print()\n",
      "   199    145.3 MiB      0.0 MiB           print(\n",
      "   200    145.3 MiB      0.0 MiB               '{} gram matrices are calculated, {} of which are ignored.'.format(\n",
      "   201    145.3 MiB      0.0 MiB                   len(param_list_precomputed), nb_gm_ignore))\n",
      "   202    145.3 MiB      0.0 MiB           str_fw += '{} gram matrices are calculated, {} of which are ignored.\\n\\n'.format(len(param_list_precomputed), nb_gm_ignore)\n",
      "   203    145.3 MiB      0.0 MiB           str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\\n\\n'\n",
      "   204    145.3 MiB      0.0 MiB           str_fw += ''.join([\n",
      "   205    145.3 MiB      0.0 MiB               '{}: {}\\n'.format(idx, params_out)\n",
      "   206    145.3 MiB      0.0 MiB               for idx, params_out in enumerate(param_list_precomputed)\n",
      "   207                                     ])\n",
      "   208                                 \n",
      "   209    145.3 MiB      0.0 MiB           print()\n",
      "   210    145.3 MiB      0.0 MiB           if len(gram_matrices) == 0:\n",
      "   211                                         print('all gram matrices are ignored, no results obtained.')\n",
      "   212                                         str_fw += '\\nall gram matrices are ignored, no results obtained.\\n\\n'\n",
      "   213                                     else:\n",
      "   214                                         # save gram matrices to file.\n",
      "   215    145.4 MiB      0.1 MiB               np.savez(results_dir + '/' + ds_name + '.gm', \n",
      "   216    145.4 MiB      0.0 MiB                        gms=gram_matrices, params=param_list_pre_revised, y=y, \n",
      "   217    145.4 MiB      0.0 MiB                        gmtime=gram_matrix_time)\n",
      "   218                                         \n",
      "   219    145.4 MiB      0.0 MiB               print(\n",
      "   220    145.4 MiB      0.0 MiB                   '3. Fitting and predicting using nested cross validation. This could really take a while...'\n",
      "   221                                         )\n",
      "   222                                         \n",
      "   223                                         # ---- use pool.imap_unordered to parallel and track progress. ----\n",
      "   224                             #            train_pref = []\n",
      "   225                             #            val_pref = []\n",
      "   226                             #            test_pref = []\n",
      "   227                             #            def func_assign(result, var_to_assign):\n",
      "   228                             #                for idx, itm in enumerate(var_to_assign):\n",
      "   229                             #                    itm.append(result[idx])                \n",
      "   230                             #            trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n",
      "   231                             #                      \n",
      "   232                             #            parallel_me(trial_do_partial, range(NUM_TRIALS), func_assign, \n",
      "   233                             #                        [train_pref, val_pref, test_pref], glbv=gram_matrices,\n",
      "   234                             #                        method='imap_unordered', n_jobs=n_jobs, chunksize=1,\n",
      "   235                             #                        itr_desc='cross validation')\n",
      "   236                                         \n",
      "   237    145.4 MiB      0.0 MiB               def init_worker(gms_toshare):\n",
      "   238                                             global G_gms\n",
      "   239                                             G_gms = gms_toshare\n",
      "   240                                         \n",
      "   241                             #            gram_matrices = np.array(gram_matrices)\n",
      "   242                             #            gms_shape = gram_matrices.shape\n",
      "   243                             #            gms_array = Array('d', np.reshape(gram_matrices.copy(), -1, order='C'))\n",
      "   244                             #            pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gms_array, gms_shape))\n",
      "   245    145.4 MiB      0.0 MiB               pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n",
      "   246    145.4 MiB      0.0 MiB               trial_do_partial = partial(parallel_trial_do, param_list_pre_revised, param_list, y, model_type)\n",
      "   247    145.4 MiB      0.0 MiB               train_pref = []\n",
      "   248    145.4 MiB      0.0 MiB               val_pref = []\n",
      "   249    145.4 MiB      0.0 MiB               test_pref = []\n",
      "   250                             #            if NUM_TRIALS < 1000 * n_jobs:\n",
      "   251                             #                chunksize = int(NUM_TRIALS / n_jobs) + 1\n",
      "   252                             #            else:\n",
      "   253                             #                chunksize = 1000\n",
      "   254    145.4 MiB      0.0 MiB               chunksize = 1\n",
      "   255    145.4 MiB      0.0 MiB               for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n",
      "   256    145.4 MiB      0.0 MiB                   train_pref.append(o1)\n",
      "   257    145.4 MiB      0.0 MiB                   val_pref.append(o2)\n",
      "   258    145.4 MiB      0.0 MiB                   test_pref.append(o3)\n",
      "   259    145.4 MiB      0.0 MiB               pool.close()\n",
      "   260    145.4 MiB      0.0 MiB               pool.join()\n",
      "   261                                 \n",
      "   262                             #            # ---- use pool.map to parallel. ----\n",
      "   263                             #            pool =  Pool(n_jobs)\n",
      "   264                             #            trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y[0:250], model_type)\n",
      "   265                             #            result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n",
      "   266                             #            train_pref = [item[0] for item in result_perf]\n",
      "   267                             #            val_pref = [item[1] for item in result_perf]\n",
      "   268                             #            test_pref = [item[2] for item in result_perf]\n",
      "   269                                 \n",
      "   270                             #            # ---- direct running, normally use a single CPU core. ----\n",
      "   271                             #            train_pref = []\n",
      "   272                             #            val_pref = []\n",
      "   273                             #            test_pref = []\n",
      "   274                             #            for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n",
      "   275                             #                o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n",
      "   276                             #                train_pref.append(o1)\n",
      "   277                             #                val_pref.append(o2)\n",
      "   278                             #                test_pref.append(o3)\n",
      "   279                             #            print()\n",
      "   280                                 \n",
      "   281    145.4 MiB      0.0 MiB               print()\n",
      "   282    145.4 MiB      0.0 MiB               print('4. Getting final performance...')\n",
      "   283    145.4 MiB      0.0 MiB               str_fw += '\\nIII. Performance.\\n\\n'\n",
      "   284                                         # averages and confidences of performances on outer trials for each combination of parameters\n",
      "   285    145.4 MiB      0.0 MiB               average_train_scores = np.mean(train_pref, axis=0)\n",
      "   286                             #            print('val_pref: ', val_pref[0][0])\n",
      "   287    145.4 MiB      0.0 MiB               average_val_scores = np.mean(val_pref, axis=0)\n",
      "   288                             #            print('test_pref: ', test_pref[0][0])\n",
      "   289    145.4 MiB      0.0 MiB               average_perf_scores = np.mean(test_pref, axis=0)\n",
      "   290                                         # sample std is used here\n",
      "   291    145.4 MiB      0.0 MiB               std_train_scores = np.std(train_pref, axis=0, ddof=1)\n",
      "   292    145.4 MiB      0.0 MiB               std_val_scores = np.std(val_pref, axis=0, ddof=1)\n",
      "   293    145.4 MiB      0.0 MiB               std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n",
      "   294                                 \n",
      "   295    145.4 MiB      0.0 MiB               if model_type == 'regression':\n",
      "   296    145.4 MiB      0.0 MiB                   best_val_perf = np.amin(average_val_scores)\n",
      "   297                                         else:\n",
      "   298                                             best_val_perf = np.amax(average_val_scores)\n",
      "   299                             #            print('average_val_scores: ', average_val_scores)\n",
      "   300                             #            print('best_val_perf: ', best_val_perf)\n",
      "   301                             #            print()\n",
      "   302    145.4 MiB      0.0 MiB               best_params_index = np.where(average_val_scores == best_val_perf)\n",
      "   303                                         # find smallest val std with best val perf.\n",
      "   304                                         best_val_stds = [\n",
      "   305    145.4 MiB      0.0 MiB                   std_val_scores[value][best_params_index[1][idx]]\n",
      "   306    145.4 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   307                                         ]\n",
      "   308    145.4 MiB      0.0 MiB               min_val_std = np.amin(best_val_stds)\n",
      "   309    145.4 MiB      0.0 MiB               best_params_index = np.where(std_val_scores == min_val_std)\n",
      "   310                                         best_params_out = [\n",
      "   311    145.4 MiB      0.0 MiB                   param_list_pre_revised[i] for i in best_params_index[0]\n",
      "   312                                         ]\n",
      "   313    145.4 MiB      0.0 MiB               best_params_in = [param_list[i] for i in best_params_index[1]]\n",
      "   314    145.4 MiB      0.0 MiB               print('best_params_out: ', best_params_out)\n",
      "   315    145.4 MiB      0.0 MiB               print('best_params_in: ', best_params_in)\n",
      "   316    145.4 MiB      0.0 MiB               print()\n",
      "   317    145.4 MiB      0.0 MiB               print('best_val_perf: ', best_val_perf)\n",
      "   318    145.4 MiB      0.0 MiB               print('best_val_std: ', min_val_std)\n",
      "   319    145.4 MiB      0.0 MiB               str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n",
      "   320    145.4 MiB      0.0 MiB               str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n",
      "   321    145.4 MiB      0.0 MiB               str_fw += 'best_val_perf: %s\\n' % best_val_perf\n",
      "   322    145.4 MiB      0.0 MiB               str_fw += 'best_val_std: %s\\n' % min_val_std\n",
      "   323                                 \n",
      "   324                             #            print(best_params_index)\n",
      "   325                             #            print(best_params_index[0])\n",
      "   326                             #            print(average_perf_scores)\n",
      "   327                                         final_performance = [\n",
      "   328    145.4 MiB      0.0 MiB                   average_perf_scores[value][best_params_index[1][idx]]\n",
      "   329    145.4 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   330                                         ]\n",
      "   331                                         final_confidence = [\n",
      "   332    145.4 MiB      0.0 MiB                   std_perf_scores[value][best_params_index[1][idx]]\n",
      "   333    145.4 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   334                                         ]\n",
      "   335    145.4 MiB      0.0 MiB               print('final_performance: ', final_performance)\n",
      "   336    145.4 MiB      0.0 MiB               print('final_confidence: ', final_confidence)\n",
      "   337    145.4 MiB      0.0 MiB               str_fw += 'final_performance: %s\\n' % final_performance\n",
      "   338    145.4 MiB      0.0 MiB               str_fw += 'final_confidence: %s\\n' % final_confidence\n",
      "   339                                         train_performance = [\n",
      "   340    145.4 MiB      0.0 MiB                   average_train_scores[value][best_params_index[1][idx]]\n",
      "   341    145.4 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   342                                         ]\n",
      "   343                                         train_std = [\n",
      "   344    145.4 MiB      0.0 MiB                   std_train_scores[value][best_params_index[1][idx]]\n",
      "   345    145.4 MiB      0.0 MiB                   for idx, value in enumerate(best_params_index[0])\n",
      "   346                                         ]\n",
      "   347    145.4 MiB      0.0 MiB               print('train_performance: %s' % train_performance)\n",
      "   348    145.4 MiB      0.0 MiB               print('train_std: ', train_std)\n",
      "   349    145.4 MiB      0.0 MiB               str_fw += 'train_performance: %s\\n' % train_performance\n",
      "   350    145.4 MiB      0.0 MiB               str_fw += 'train_std: %s\\n\\n' % train_std\n",
      "   351                                 \n",
      "   352    145.4 MiB      0.0 MiB               print()\n",
      "   353    145.4 MiB      0.0 MiB               tt_total = time.time() - tts  # training time for all hyper-parameters\n",
      "   354    145.4 MiB      0.0 MiB               average_gram_matrix_time = np.mean(gram_matrix_time)\n",
      "   355    145.4 MiB      0.0 MiB               std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n",
      "   356                                         best_gram_matrix_time = [\n",
      "   357    145.4 MiB      0.0 MiB                   gram_matrix_time[i] for i in best_params_index[0]\n",
      "   358                                         ]\n",
      "   359    145.4 MiB      0.0 MiB               ave_bgmt = np.mean(best_gram_matrix_time)\n",
      "   360    145.4 MiB      0.0 MiB               std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n",
      "   361    145.4 MiB      0.0 MiB               print(\n",
      "   362    145.4 MiB      0.0 MiB                   'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n",
      "   363    145.4 MiB      0.0 MiB                   .format(average_gram_matrix_time, std_gram_matrix_time))\n",
      "   364    145.4 MiB      0.0 MiB               print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n",
      "   365    145.4 MiB      0.0 MiB                   ave_bgmt, std_bgmt))\n",
      "   366    145.4 MiB      0.0 MiB               print(\n",
      "   367    145.4 MiB      0.0 MiB                   'total training time with all hyper-param choices: {:.2f}s'.format(\n",
      "   368    145.4 MiB      0.0 MiB                       tt_total))\n",
      "   369    145.4 MiB      0.0 MiB               str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n",
      "   370    145.4 MiB      0.0 MiB               str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n",
      "   371    145.4 MiB      0.0 MiB               str_fw += 'total training time with all hyper-param choices: {:.2f}s\\n\\n'.format(tt_total)\n",
      "   372                                 \n",
      "   373                                         # # save results to file\n",
      "   374                                         # np.savetxt(results_name_pre + 'average_train_scores.dt',\n",
      "   375                                         #            average_train_scores)\n",
      "   376                                         # np.savetxt(results_name_pre + 'average_val_scores', average_val_scores)\n",
      "   377                                         # np.savetxt(results_name_pre + 'average_perf_scores.dt',\n",
      "   378                                         #            average_perf_scores)\n",
      "   379                                         # np.savetxt(results_name_pre + 'std_train_scores.dt', std_train_scores)\n",
      "   380                                         # np.savetxt(results_name_pre + 'std_val_scores.dt', std_val_scores)\n",
      "   381                                         # np.savetxt(results_name_pre + 'std_perf_scores.dt', std_perf_scores)\n",
      "   382                                 \n",
      "   383                                         # np.save(results_name_pre + 'best_params_index', best_params_index)\n",
      "   384                                         # np.save(results_name_pre + 'best_params_pre.dt', best_params_out)\n",
      "   385                                         # np.save(results_name_pre + 'best_params_in.dt', best_params_in)\n",
      "   386                                         # np.save(results_name_pre + 'best_val_perf.dt', best_val_perf)\n",
      "   387                                         # np.save(results_name_pre + 'best_val_std.dt', best_val_std)\n",
      "   388                                         # np.save(results_name_pre + 'final_performance.dt', final_performance)\n",
      "   389                                         # np.save(results_name_pre + 'final_confidence.dt', final_confidence)\n",
      "   390                                         # np.save(results_name_pre + 'train_performance.dt', train_performance)\n",
      "   391                                         # np.save(results_name_pre + 'train_std.dt', train_std)\n",
      "   392                                 \n",
      "   393                                         # np.save(results_name_pre + 'gram_matrix_time.dt', gram_matrix_time)\n",
      "   394                                         # np.save(results_name_pre + 'average_gram_matrix_time.dt',\n",
      "   395                                         #         average_gram_matrix_time)\n",
      "   396                                         # np.save(results_name_pre + 'std_gram_matrix_time.dt',\n",
      "   397                                         #         std_gram_matrix_time)\n",
      "   398                                         # np.save(results_name_pre + 'best_gram_matrix_time.dt',\n",
      "   399                                         #         best_gram_matrix_time)\n",
      "   400                                 \n",
      "   401                                         # print out as table.\n",
      "   402    145.4 MiB      0.0 MiB               from collections import OrderedDict\n",
      "   403    145.4 MiB      0.0 MiB               from tabulate import tabulate\n",
      "   404    145.4 MiB      0.0 MiB               table_dict = {}\n",
      "   405    145.4 MiB      0.0 MiB               if model_type == 'regression':\n",
      "   406    145.6 MiB      0.0 MiB                   for param_in in param_list:\n",
      "   407    145.6 MiB      0.2 MiB                       param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n",
      "   408                                         else:\n",
      "   409                                             for param_in in param_list:\n",
      "   410                                                 param_in['C'] = '{:.2e}'.format(param_in['C'])\n",
      "   411    145.6 MiB      0.0 MiB               table_dict['params'] = [{**param_out, **param_in}\n",
      "   412    145.6 MiB      0.0 MiB                                       for param_in in param_list for param_out in param_list_pre_revised]\n",
      "   413                                         table_dict['gram_matrix_time'] = [\n",
      "   414    145.6 MiB      0.0 MiB                   '{:.2f}'.format(gram_matrix_time[index_out])\n",
      "   415    145.6 MiB      0.0 MiB                   for param_in in param_list\n",
      "   416    145.6 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   417                                         ]\n",
      "   418                                         table_dict['valid_perf'] = [\n",
      "   419    145.6 MiB      0.0 MiB                   '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n",
      "   420                                                                    std_val_scores[index_out][index_in])\n",
      "   421    145.6 MiB      0.0 MiB                   for index_in, _ in enumerate(param_list)\n",
      "   422    145.6 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   423                                         ]\n",
      "   424                                         table_dict['test_perf'] = [\n",
      "   425    145.6 MiB      0.0 MiB                   '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n",
      "   426                                                                    std_perf_scores[index_out][index_in])\n",
      "   427    145.6 MiB      0.0 MiB                   for index_in, _ in enumerate(param_list)\n",
      "   428    145.6 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   429                                         ]\n",
      "   430                                         table_dict['train_perf'] = [\n",
      "   431    145.6 MiB      0.0 MiB                   '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n",
      "   432                                                                    std_train_scores[index_out][index_in])\n",
      "   433    145.6 MiB      0.0 MiB                   for index_in, _ in enumerate(param_list)\n",
      "   434    145.6 MiB      0.0 MiB                   for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   435                                         ]\n",
      "   436                                         keyorder = [\n",
      "   437    145.6 MiB      0.0 MiB                   'params', 'train_perf', 'valid_perf', 'test_perf',\n",
      "   438    145.6 MiB      0.0 MiB                   'gram_matrix_time'\n",
      "   439                                         ]\n",
      "   440    145.6 MiB      0.0 MiB               print()\n",
      "   441    145.6 MiB      0.0 MiB               tb_print = tabulate(\n",
      "   442    145.6 MiB      0.0 MiB                   OrderedDict(\n",
      "   443    145.6 MiB      0.0 MiB                       sorted(table_dict.items(),\n",
      "   444    145.6 MiB      0.0 MiB                              key=lambda i: keyorder.index(i[0]))),\n",
      "   445    145.6 MiB      0.0 MiB                   headers='keys')\n",
      "   446                             #            print(tb_print)\n",
      "   447    145.6 MiB      0.0 MiB               str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n",
      "   448                                 \n",
      "   449                                 # read gram matrices from file.\n",
      "   450                                 else:    \n",
      "   451                                     # Grid of parameters with a discrete number of values for each.\n",
      "   452                             #        param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n",
      "   453                                     param_list = list(ParameterGrid(param_grid))\n",
      "   454                                 \n",
      "   455                                     # read gram matrices from file.\n",
      "   456                                     print()\n",
      "   457                                     print('2. Reading gram matrices from file...')\n",
      "   458                                     str_fw += '\\nII. Gram matrices.\\n\\nGram matrices are read from file, see last log for detail.\\n'\n",
      "   459                                     gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')\n",
      "   460                                     gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed\n",
      "   461                                     gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices\n",
      "   462                                     param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones\n",
      "   463                                     y = gmfile['y'].tolist()\n",
      "   464                                     \n",
      "   465                                     tts = time.time()  # start training time\n",
      "   466                             #        nb_gm_ignore = 0  # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)            \n",
      "   467                                     print(\n",
      "   468                                         '3. Fitting and predicting using nested cross validation. This could really take a while...'\n",
      "   469                                     )\n",
      "   470                              \n",
      "   471                                     # ---- use pool.imap_unordered to parallel and track progress. ----\n",
      "   472                                     def init_worker(gms_toshare):\n",
      "   473                                         global G_gms\n",
      "   474                                         G_gms = gms_toshare\n",
      "   475                             \n",
      "   476                                     pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n",
      "   477                                     trial_do_partial = partial(parallel_trial_do, param_list_pre_revised, param_list, y, model_type)\n",
      "   478                                     train_pref = []\n",
      "   479                                     val_pref = []\n",
      "   480                                     test_pref = []\n",
      "   481                                     chunksize = 1\n",
      "   482                                     for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n",
      "   483                                         train_pref.append(o1)\n",
      "   484                                         val_pref.append(o2)\n",
      "   485                                         test_pref.append(o3)\n",
      "   486                                     pool.close()\n",
      "   487                                     pool.join()\n",
      "   488                                     \n",
      "   489                                     # # ---- use pool.map to parallel. ----\n",
      "   490                                     # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n",
      "   491                                     # train_pref = [item[0] for item in result_perf]\n",
      "   492                                     # val_pref = [item[1] for item in result_perf]\n",
      "   493                                     # test_pref = [item[2] for item in result_perf]\n",
      "   494                             \n",
      "   495                                     # # ---- use joblib.Parallel to parallel and track progress. ----\n",
      "   496                                     # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)\n",
      "   497                                     # result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS))\n",
      "   498                                     # train_pref = [item[0] for item in result_perf]\n",
      "   499                                     # val_pref = [item[1] for item in result_perf]\n",
      "   500                                     # test_pref = [item[2] for item in result_perf]\n",
      "   501                             \n",
      "   502                             #        # ---- direct running, normally use a single CPU core. ----\n",
      "   503                             #        train_pref = []\n",
      "   504                             #        val_pref = []\n",
      "   505                             #        test_pref = []\n",
      "   506                             #        for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n",
      "   507                             #            o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n",
      "   508                             #            train_pref.append(o1)\n",
      "   509                             #            val_pref.append(o2)\n",
      "   510                             #            test_pref.append(o3)\n",
      "   511                             \n",
      "   512                                     print()\n",
      "   513                                     print('4. Getting final performance...')\n",
      "   514                                     str_fw += '\\nIII. Performance.\\n\\n'\n",
      "   515                                     # averages and confidences of performances on outer trials for each combination of parameters\n",
      "   516                                     average_train_scores = np.mean(train_pref, axis=0)\n",
      "   517                                     average_val_scores = np.mean(val_pref, axis=0)\n",
      "   518                                     average_perf_scores = np.mean(test_pref, axis=0)\n",
      "   519                                     # sample std is used here\n",
      "   520                                     std_train_scores = np.std(train_pref, axis=0, ddof=1)\n",
      "   521                                     std_val_scores = np.std(val_pref, axis=0, ddof=1)\n",
      "   522                                     std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n",
      "   523                             \n",
      "   524                                     if model_type == 'regression':\n",
      "   525                                         best_val_perf = np.amin(average_val_scores)\n",
      "   526                                     else:\n",
      "   527                                         best_val_perf = np.amax(average_val_scores)\n",
      "   528                                     best_params_index = np.where(average_val_scores == best_val_perf)\n",
      "   529                                     # find smallest val std with best val perf.\n",
      "   530                                     best_val_stds = [\n",
      "   531                                         std_val_scores[value][best_params_index[1][idx]]\n",
      "   532                                         for idx, value in enumerate(best_params_index[0])\n",
      "   533                                     ]\n",
      "   534                                     min_val_std = np.amin(best_val_stds)\n",
      "   535                                     best_params_index = np.where(std_val_scores == min_val_std)\n",
      "   536                                     best_params_out = [\n",
      "   537                                         param_list_pre_revised[i] for i in best_params_index[0]\n",
      "   538                                     ]\n",
      "   539                                     best_params_in = [param_list[i] for i in best_params_index[1]]\n",
      "   540                                     print('best_params_out: ', best_params_out)\n",
      "   541                                     print('best_params_in: ', best_params_in)\n",
      "   542                                     print()\n",
      "   543                                     print('best_val_perf: ', best_val_perf)\n",
      "   544                                     print('best_val_std: ', min_val_std)\n",
      "   545                                     str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n",
      "   546                                     str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n",
      "   547                                     str_fw += 'best_val_perf: %s\\n' % best_val_perf\n",
      "   548                                     str_fw += 'best_val_std: %s\\n' % min_val_std\n",
      "   549                             \n",
      "   550                                     final_performance = [\n",
      "   551                                         average_perf_scores[value][best_params_index[1][idx]]\n",
      "   552                                         for idx, value in enumerate(best_params_index[0])\n",
      "   553                                     ]\n",
      "   554                                     final_confidence = [\n",
      "   555                                         std_perf_scores[value][best_params_index[1][idx]]\n",
      "   556                                         for idx, value in enumerate(best_params_index[0])\n",
      "   557                                     ]\n",
      "   558                                     print('final_performance: ', final_performance)\n",
      "   559                                     print('final_confidence: ', final_confidence)\n",
      "   560                                     str_fw += 'final_performance: %s\\n' % final_performance\n",
      "   561                                     str_fw += 'final_confidence: %s\\n' % final_confidence\n",
      "   562                                     train_performance = [\n",
      "   563                                         average_train_scores[value][best_params_index[1][idx]]\n",
      "   564                                         for idx, value in enumerate(best_params_index[0])\n",
      "   565                                     ]\n",
      "   566                                     train_std = [\n",
      "   567                                         std_train_scores[value][best_params_index[1][idx]]\n",
      "   568                                         for idx, value in enumerate(best_params_index[0])\n",
      "   569                                     ]\n",
      "   570                                     print('train_performance: %s' % train_performance)\n",
      "   571                                     print('train_std: ', train_std)\n",
      "   572                                     str_fw += 'train_performance: %s\\n' % train_performance\n",
      "   573                                     str_fw += 'train_std: %s\\n\\n' % train_std\n",
      "   574                             \n",
      "   575                                     print()\n",
      "   576                                     average_gram_matrix_time = np.mean(gram_matrix_time)\n",
      "   577                                     std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n",
      "   578                                     best_gram_matrix_time = [\n",
      "   579                                         gram_matrix_time[i] for i in best_params_index[0]\n",
      "   580                                     ]\n",
      "   581                                     ave_bgmt = np.mean(best_gram_matrix_time)\n",
      "   582                                     std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n",
      "   583                                     print(\n",
      "   584                                         'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n",
      "   585                                         .format(average_gram_matrix_time, std_gram_matrix_time))\n",
      "   586                                     print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n",
      "   587                                         ave_bgmt, std_bgmt))\n",
      "   588                                     tt_poster = time.time() - tts  # training time with hyper-param choices who did not participate in calculation of gram matrices\n",
      "   589                                     print(\n",
      "   590                                         'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format(\n",
      "   591                                             tt_poster))\n",
      "   592                                     print('total training time with all hyper-param choices: {:.2f}s'.format(\n",
      "   593                                             tt_poster + np.sum(gram_matrix_time)))\n",
      "   594                             #        str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n",
      "   595                             #        str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n",
      "   596                                     str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\\n\\n'.format(tt_poster)\n",
      "   597                             \n",
      "   598                                     # print out as table.\n",
      "   599                                     from collections import OrderedDict\n",
      "   600                                     from tabulate import tabulate\n",
      "   601                                     table_dict = {}\n",
      "   602                                     if model_type == 'regression':\n",
      "   603                                         for param_in in param_list:\n",
      "   604                                             param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n",
      "   605                                     else:\n",
      "   606                                         for param_in in param_list:\n",
      "   607                                             param_in['C'] = '{:.2e}'.format(param_in['C'])\n",
      "   608                                     table_dict['params'] = [{**param_out, **param_in}\n",
      "   609                                                             for param_in in param_list for param_out in param_list_pre_revised]\n",
      "   610                             #        table_dict['gram_matrix_time'] = [\n",
      "   611                             #            '{:.2f}'.format(gram_matrix_time[index_out])\n",
      "   612                             #            for param_in in param_list\n",
      "   613                             #            for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   614                             #        ]\n",
      "   615                                     table_dict['valid_perf'] = [\n",
      "   616                                         '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n",
      "   617                                                                std_val_scores[index_out][index_in])\n",
      "   618                                         for index_in, _ in enumerate(param_list)\n",
      "   619                                         for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   620                                     ]\n",
      "   621                                     table_dict['test_perf'] = [\n",
      "   622                                         '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n",
      "   623                                                                std_perf_scores[index_out][index_in])\n",
      "   624                                         for index_in, _ in enumerate(param_list)\n",
      "   625                                         for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   626                                     ]\n",
      "   627                                     table_dict['train_perf'] = [\n",
      "   628                                         '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n",
      "   629                                                                std_train_scores[index_out][index_in])\n",
      "   630                                         for index_in, _ in enumerate(param_list)\n",
      "   631                                         for index_out, _ in enumerate(param_list_pre_revised)\n",
      "   632                                     ]\n",
      "   633                                     keyorder = [\n",
      "   634                                         'params', 'train_perf', 'valid_perf', 'test_perf'\n",
      "   635                                     ]\n",
      "   636                                     print()\n",
      "   637                                     tb_print = tabulate(\n",
      "   638                                         OrderedDict(\n",
      "   639                                             sorted(table_dict.items(),\n",
      "   640                                                    key=lambda i: keyorder.index(i[0]))),\n",
      "   641                                         headers='keys')\n",
      "   642                             #        print(tb_print)\n",
      "   643                                     str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n",
      "   644                             \n",
      "   645                                     # open file to save all results for this dataset.\n",
      "   646                                     if not os.path.exists(results_dir):\n",
      "   647                                         os.makedirs(results_dir)\n",
      "   648                                         \n",
      "   649                                 # open file to save all results for this dataset.\n",
      "   650    145.6 MiB      0.0 MiB       if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):\n",
      "   651                                     with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:\n",
      "   652                                         f.write(str_fw)\n",
      "   653                                 else:\n",
      "   654    145.6 MiB      0.0 MiB           with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f:\n",
      "   655    145.6 MiB      0.0 MiB               content = f.read()\n",
      "   656    145.6 MiB      0.0 MiB               f.seek(0, 0)\n",
      "   657    145.6 MiB      0.0 MiB               f.write(str_fw + '\\n\\n\\n' + content)\n",
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import functools\n",
    "import sys\n",
    "sys.path.insert(0, \"../\")\n",
    "sys.path.insert(0, \"../../\")\n",
    "from libs import *\n",
    "import multiprocessing\n",
    "\n",
    "from pygraph.kernels.spKernel import spkernel\n",
    "from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n",
    "#from pygraph.utils.model_selection_precomputed import trial_do\n",
    "\n",
    "dslist = [\n",
    "    {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds',\n",
    "        'task': 'regression'},  # node symb\n",
    "#    {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', 'task': 'regression',\n",
    "#             'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt', },  \n",
    "#    # contains single node graph, node symb\n",
    "#    {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds', },  # node/edge symb\n",
    "#    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', },  # unlabeled\n",
    "#    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',\n",
    "#             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb\n",
    "#    {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},\n",
    "#    # node nsymb\n",
    "#    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "#    # node symb/nsymb\n",
    "#    {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
    "#    # node/edge symb\n",
    "#    {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat',\n",
    "#     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},  # node symb\n",
    "\n",
    "    #     {'name': 'COIL-DEL', 'dataset': '../../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n",
    "    # # #     {'name': 'BZR', 'dataset': '../../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n",
    "    # # #     {'name': 'COX2', 'dataset': '../../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n",
    "    #     {'name': 'Fingerprint', 'dataset': '../../datasets/Fingerprint/Fingerprint_A.txt'},\n",
    "    #\n",
    "    # #     {'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'SYNTHETIC', 'dataset': '../../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n",
    "    # #     {'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n",
    "    # #     {'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n",
    "\n",
    "    # #     {'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n",
    "    # #     {'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n",
    "    #     {'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1.mat',\n",
    "    #         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "    #     {'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109.mat',\n",
    "    #         'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n",
    "    #     {'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf',\n",
    "    #         'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n",
    "\n",
    "    #     # not working below\n",
    "    #     {'name': 'PTC_FM', 'dataset': '../../datasets/PTC/Train/FM.ds',},\n",
    "    #     {'name': 'PTC_FR', 'dataset': '../../datasets/PTC/Train/FR.ds',},\n",
    "    #     {'name': 'PTC_MM', 'dataset': '../../datasets/PTC/Train/MM.ds',},\n",
    "    #     {'name': 'PTC_MR', 'dataset': '../../datasets/PTC/Train/MR.ds',},\n",
    "]\n",
    "estimator = spkernel\n",
    "mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n",
    "param_grid_precomputed = {'node_kernels': [\n",
    "    {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n",
    "param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n",
    "              {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n",
    "\n",
    "for ds in dslist:\n",
    "    print()\n",
    "    print(ds['name'])\n",
    "    model_selection_for_precomputed_kernel(\n",
    "        ds['dataset'],\n",
    "        estimator,\n",
    "        param_grid_precomputed,\n",
    "        (param_grid[1] if ('task' in ds and ds['task']\n",
    "                           == 'regression') else param_grid[0]),\n",
    "        (ds['task'] if 'task' in ds else 'classification'),\n",
    "        NUM_TRIALS=30,\n",
    "        datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n",
    "        extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n",
    "        ds_name=ds['name'],\n",
    "        n_jobs=multiprocessing.cpu_count(),\n",
    "        read_gm_from_file=False)\n",
    "    print()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/tests/test_lib.ipynb
+++ b/notebooks/tests/test_lib.ipynb
--- a/notebooks/tests/test_modelselection.ipynb
+++ b/notebooks/tests/test_modelselection.ipynb
--- a/notebooks/tests/test_networkx.ipynb
+++ b/notebooks/tests/test_networkx.ipynb
--- a/notebooks/tests/test_parallel_chunksize.py
+++ b/notebooks/tests/test_parallel_chunksize.py
@@ -16,12 +16,13 @@ from tqdm import tqdm
 import networkx as nx
 import numpy as np
 import functools
 from libs import *
 #import multiprocessing
 from matplotlib import pyplot as plt
 from sklearn.model_selection import ParameterGrid

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from libs import *
 from pygraph.utils.utils import getSPGraph, direct_product
 from pygraph.utils.graphdataset import get_dataset_attributes
 from pygraph.utils.graphfiles import loadDataset
@@ -605,20 +606,20 @@ def compute_gram_matrices(datafile,


 dslist = [
    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
    {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds',
        'task': 'regression'},  # node symb
    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  # contains single node graph, node symb
    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
    {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', 'task': 'regression',
             'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt', },  # contains single node graph, node symb
    {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
    {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},
    # node symb/nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node/edge symb
 #    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
 #    {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
 #    {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},
 #    {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat',
 #     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},  # node symb
 ]

@@ -677,12 +678,12 @@ for idx1, ds in enumerate(dslist):

    print()
    print(gmtmat[idx1, :])
    np.save('test_parallel/' + estimator.__name__ + '.' + ds['name'] + '_' + 
    np.save('../test_parallel/' + estimator.__name__ + '.' + ds['name'] + '_' + 
            str(idx1), gmtmat[idx1, :])

    p = ax.plot(chunklist, gmtmat[idx1, :], '.-', label=ds['name'], zorder=3)    
    ax.legend(loc='upper right', ncol=3, labelspacing=0.1, handletextpad=0.4, 
              columnspacing=0.6)
    plt.savefig('test_parallel/' + estimator.__name__ + str(idx1) + '_' + 
    plt.savefig('../test_parallel/' + estimator.__name__ + str(idx1) + '_' + 
                str(cpus) + '.eps', format='eps', dpi=300)
 #    plt.show()
--- a/notebooks/tests/test_scikit_ksvm.ipynb
+++ b/notebooks/tests/test_scikit_ksvm.ipynb
--- a/notebooks/tests/test_sp_methods.py
+++ b/notebooks/tests/test_sp_methods.py
--- a/notebooks/tests/test_spkernel.ipynb
+++ b/notebooks/tests/test_spkernel.ipynb
--- a/notebooks/unfinished/run_cyclicpatternkernel.ipynb
+++ b/notebooks/unfinished/run_cyclicpatternkernel.ipynb
--- a/notebooks/unfinished/run_treeletkernel_acyclic.ipynb
+++ b/notebooks/unfinished/run_treeletkernel_acyclic.ipynb
--- a/notebooks/unfinished/run_treepatternkernel.ipynb
+++ b/notebooks/unfinished/run_treepatternkernel.ipynb
--- a/notebooks/unfinished/run_weisfeilerLehmankernel.ipynb
+++ b/notebooks/unfinished/run_weisfeilerLehmankernel.ipynb
--- a/notebooks/unfinished/test_mpi.py
+++ b/notebooks/unfinished/test_mpi.py
--- a/notebooks/utils/check_gm.py
+++ b/notebooks/utils/check_gm.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Test gram matrices.
 Check basic properties of gram matrices.
 Created on Wed Sep 19 15:32:29 2018

@author: ljia
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 from numpy.linalg import eig

 # read gram matrices from file.
 results_dir = 'results/marginalizedkernel/myria'
 results_dir = '../results/marginalizedkernel/myria'
 ds_name = 'ENZYMES'
 gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
 #print('gm time: ', gmfile['gmtime'])
@@ -27,7 +27,7 @@ for idx, x in enumerate(gram_matrices):
    print(idx)
    plt.imshow(x)
    plt.colorbar()
    plt.savefig('check_gm/' + ds_name + '.gm.eps', format='eps', dpi=300)
    plt.savefig('../check_gm/' + ds_name + '.gm.eps', format='eps', dpi=300)
 #    print(np.transpose(x))
    print('if symmetric: ', np.array_equal(x, np.transpose(x)))
    
@@ -45,4 +45,4 @@ for idx, x in enumerate(gram_matrices):
    [lamnda, v] = eig(x)
    print('min, max lambda: ', min(lamnda), max(lamnda))
    if -1e-10 > min(lamnda):
        raise Exception('wrong eigen values.')
        raise Exception('wrong eigen values.')
--- a/notebooks/utils/check_gm_gstsp.py
+++ b/notebooks/utils/check_gm_gstsp.py
@@ -16,7 +16,7 @@ Author : Sandro Vega-Pons, Emanuele Olivetti
 """

 import sys
 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 import numpy as np
 import networkx as nx
 from pygraph.utils.graphfiles import loadDataset
@@ -113,15 +113,15 @@ class GK_SP:


 ds_name = 'PAH'
 datafile = '../datasets/PAH/dataset.ds'
 datafile = '../../datasets/PAH/dataset.ds'
 dataset, y = loadDataset(datafile, filename_y=None, extra_params=None)
 gk_sp = GK_SP()
 x = gk_sp.compare_list(dataset)
 np.savez('check_gm/' + ds_name + '.gm.jstsp', gms=x)
 np.savez('../check_gm/' + ds_name + '.gm.jstsp', gms=x)

 plt.imshow(x)
 plt.colorbar()
 plt.savefig('check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300)
 plt.savefig('../check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300)
 # print(np.transpose(x))
 print('if symmetric: ', np.array_equal(x, np.transpose(x)))

--- a/notebooks/utils/draw_datasize_vs_chunksize.py
+++ b/notebooks/utils/draw_datasize_vs_chunksize.py
@@ -33,7 +33,7 @@ def idx2chunksize2(idx):
    else:
        return (idx - 15) * 20000 * 10000

 idx, mrlt, rlt = loadmin('test_parallel/myria/ENZYMES.npy')
 idx, mrlt, rlt = loadmin('../test_parallel/myria/ENZYMES.npy')
 csize = idx2chunksize2(idx)

 #dsize = np.array([183, 150, 68, 94, 188, 2250, 600])
--- a/notebooks/utils/draw_gm.py
+++ b/notebooks/utils/draw_gm.py
@@ -60,5 +60,5 @@ plt.xticks(ind + width / 2, ('Acyclic', 'Alkane', 'MAO', 'PAH', 'MUTAG', 'Letter
 ax.set_ylim(bottom=1e-15)
 ax.legend((p1[0], p2[0], p3[0], p4[0]), ('min1', 'max1', 'min2', 'max2'), loc='upper right')

 plt.savefig('check_gm/compare_eigen_values.eps', format='eps', dpi=300)
 plt.savefig('../check_gm/compare_eigen_values.eps', format='eps', dpi=300)
 plt.show()
--- a/notebooks/utils/draw_running_time.py
+++ b/notebooks/utils/draw_running_time.py
@@ -81,6 +81,6 @@ ax.yaxis.set_ticks_position('none')
 fig.subplots_adjust(right=0.63)
 fig.legend(loc='right', ncol=1, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)

 plt.savefig('check_gm/parallel_runtime_on_different_machines.eps', format='eps', dpi=300,
 plt.savefig('../check_gm/parallel_runtime_on_different_machines.eps', format='eps', dpi=300,
            transparent=True, bbox_inches='tight')
 plt.show()
--- a/notebooks/utils/get_dataset_attributes.ipynb
+++ b/notebooks/utils/get_dataset_attributes.ipynb
@@ -39,7 +39,7 @@
      "\n",
      "Alkane:\n",
      "substructures : {'non linear', 'linear'}\n",
      "node_labeled : True\n",
      "node_labeled : False\n",
      "edge_labeled : False\n",
      "is_directed : False\n",
      "dataset_size : 150\n",
@@ -55,7 +55,7 @@
      "ave_fill_factor : 0.10199498404299989\n",
      "min_fill_factor : 0.0\n",
      "max_fill_factor : 0.25\n",
      "node_label_num : 2\n",
      "node_label_num : 1\n",
      "edge_label_num : 1\n",
      "node_attr_dim : 0\n",
      "edge_attr_dim : 0\n",
@@ -542,8 +542,8 @@
      "edge_attr_dim : 0\n",
      "class_number : 2\n",
      "\n",
      "load SDF: 100%|██████████| 4457424/4457424 [00:08<00:00, 522501.84it/s]\n",
      "ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4625.31it/s] \n",
      "load SDF: 100%|██████████| 4457424/4457424 [00:08<00:00, 497346.72it/s]\n",
      "ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4689.76it/s] \n",
      "\n",
      "NCI-HIV:\n",
      "substructures : {'non linear', 'linear'}\n",
@@ -574,45 +574,45 @@
   ],
   "source": [
    "import sys\n",
    "sys.path.insert(0, \"../\")\n",
    "sys.path.insert(0, \"../../\")\n",
    "from pygraph.utils.graphfiles import loadDataset\n",
    "from pygraph.utils.graphdataset import get_dataset_attributes\n",
    "\n",
    "dslist = [\n",
    "    {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},\n",
    "    {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',\n",
    "        'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},\n",
    "    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},\n",
    "    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},\n",
    "    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n",
    "    {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds',},\n",
    "    {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds',\n",
    "        'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt',},\n",
    "    {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds',},\n",
    "    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds',},\n",
    "    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',\n",
    "        'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},\n",
    "    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n",
    "    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "    {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
    "    {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n",
    "    {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},\n",
    "    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n",
    "    {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},\n",
    "    {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat',\n",
    "     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},\n",
    "    {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},\n",
    "    {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n",
    "    {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n",
    "    {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n",
    "    {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n",
    "    {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},\n",
    "    {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},\n",
    "    {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'},    \n",
    "    {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n",
    "    {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'},   \n",
    "    {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n",
    "    {'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'},\n",
    "    {'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n",
    "    {'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n",
    "    {'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n",
    "    {'name': 'SYNTHETIC', 'dataset': '../../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n",
    "    {'name': 'BZR', 'dataset': '../../datasets/BZR_txt/BZR_A_sparse.txt'},\n",
    "    {'name': 'COX2', 'dataset': '../../datasets/COX2_txt/COX2_A_sparse.txt'},\n",
    "    {'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'},    \n",
    "    {'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n",
    "    {'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'},   \n",
    "    {'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1.mat',\n",
    "        'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n",
    "    {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n",
    "    {'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109.mat',\n",
    "        'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n",
    "    {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n",
    "        'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',},\n",
    "    {'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf',\n",
    "        'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',},\n",
    "\n",
    "#     # not working below\n",
    "#     {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n",
    "#     {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n",
    "#     {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n",
    "#     {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n",
    "#     {'name': 'PTC_FM', 'dataset': '../../datasets/PTC/Train/FM.ds',},\n",
    "#     {'name': 'PTC_FR', 'dataset': '../../datasets/PTC/Train/FR.ds',},\n",
    "#     {'name': 'PTC_MM', 'dataset': '../../datasets/PTC/Train/MM.ds',},\n",
    "#     {'name': 'PTC_MR', 'dataset': '../../datasets/PTC/Train/MR.ds',},\n",
    "]\n",
    "\n",
    "for ds in dslist:\n",
--- a/notebooks/utils/get_dataset_attributes.py
+++ b/notebooks/utils/get_dataset_attributes.py
@@ -0,0 +1,62 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct 17 16:07:38 2018

@author: ljia
 """

 import sys
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.graphdataset import get_dataset_attributes

 dslist = [
    {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds',},
    {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds',
        'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt',},
    {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds',},
    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds',},
    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
        'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},
    {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},
    {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat',
     'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},
    {'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'},
    {'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},
    {'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'},
    {'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'},
    {'name': 'SYNTHETIC', 'dataset': '../../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},
    {'name': 'BZR', 'dataset': '../../datasets/BZR_txt/BZR_A_sparse.txt'},
    {'name': 'COX2', 'dataset': '../../datasets/COX2_txt/COX2_A_sparse.txt'},
    {'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'},    
    {'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},
    {'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'},   
    {'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1.mat',
        'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},
    {'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109.mat',
        'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},
    {'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf',
        'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',},

 #     # not working below
 #     {'name': 'PTC_FM', 'dataset': '../../datasets/PTC/Train/FM.ds',},
 #     {'name': 'PTC_FR', 'dataset': '../../datasets/PTC/Train/FR.ds',},
 #     {'name': 'PTC_MM', 'dataset': '../../datasets/PTC/Train/MM.ds',},
 #     {'name': 'PTC_MR', 'dataset': '../../datasets/PTC/Train/MR.ds',},
 ]

 for ds in dslist:
    dataset, y = loadDataset(
        ds['dataset'],
        filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
        extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
    attrs = get_dataset_attributes(
        dataset, target=y, node_label='atom', edge_label='bond_type')
    print()
    print(ds['name'] + ':')
    for atr in attrs:
        print(atr, ':', attrs[atr])
    print()
--- a/notebooks/utils/plot_all_graphs.ipynb
+++ b/notebooks/utils/plot_all_graphs.ipynb
--- a/notebooks/utils/run_degree_differs_cw.py
+++ b/notebooks/utils/run_degree_differs_cw.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -19,7 +19,7 @@ from libs import *
 import multiprocessing

 dslist = [
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -35,7 +35,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_degree_differs_ma.py
+++ b/notebooks/utils/run_degree_differs_ma.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -19,7 +19,7 @@ from libs import *
 import multiprocessing

 dslist = [
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -36,7 +36,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_degree_differs_rw.py
+++ b/notebooks/utils/run_degree_differs_rw.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -21,7 +21,7 @@ import functools
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

 dslist = [
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -59,7 +59,7 @@ def run_ms(dataset, y, ds):
                          'sub_kernel': ['geo', 'exp']}
        _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
        average_gram_matrix_time = np.mean(gram_matrix_time)
        std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_degree_differs_sp.py
+++ b/notebooks/utils/run_degree_differs_sp.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -21,7 +21,7 @@ import multiprocessing
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

 dslist = [
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -36,7 +36,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_degree_differs_ssp.py
+++ b/notebooks/utils/run_degree_differs_ssp.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -21,7 +21,7 @@ import multiprocessing
 from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct

 dslist = [
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -38,7 +38,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_degree_differs_uhp.py
+++ b/notebooks/utils/run_degree_differs_uhp.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -19,7 +19,7 @@ from libs import *
 import multiprocessing

 dslist = [
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -34,7 +34,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_vertex_differs_cw.py
+++ b/notebooks/utils/run_vertex_differs_cw.py
@@ -9,7 +9,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -24,12 +24,12 @@ dslist = [
 #             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  
 #    # contains single node graph, node symb
 #    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
 #    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
 #    # node nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -45,7 +45,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_vertex_differs_ma.py
+++ b/notebooks/utils/run_vertex_differs_ma.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -25,12 +25,12 @@ dslist = [
 #             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  
 #    # contains single node graph, node symb
 #    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
 #    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
 #    # node nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -47,7 +47,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_vertex_differs_rw.py
+++ b/notebooks/utils/run_vertex_differs_rw.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -27,12 +27,12 @@ dslist = [
 #             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  
 #    # contains single node graph, node symb
 #    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
 #    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
 #    # node nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -70,7 +70,7 @@ def run_ms(dataset, y, ds):
                          'sub_kernel': ['geo', 'exp']}
        _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
        average_gram_matrix_time = np.mean(gram_matrix_time)
        std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_vertex_differs_sp.py
+++ b/notebooks/utils/run_vertex_differs_sp.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -27,12 +27,12 @@ dslist = [
 #             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  
 #    # contains single node graph, node symb
 #    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
 #    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
 #    # node nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -47,7 +47,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_vertex_differs_ssp.py
+++ b/notebooks/utils/run_vertex_differs_ssp.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -32,7 +32,7 @@ dslist = [
 #             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
 #    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
 #    # node nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -49,7 +49,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/notebooks/utils/run_vertex_differs_uhp.py
+++ b/notebooks/utils/run_vertex_differs_uhp.py
@@ -10,7 +10,7 @@ import sys
 import numpy as np
 import networkx as nx

 sys.path.insert(0, "../")
 sys.path.insert(0, "../../")
 from pygraph.utils.graphfiles import loadDataset
 from pygraph.utils.model_selection_precomputed import compute_gram_matrices
 from sklearn.model_selection import ParameterGrid
@@ -25,12 +25,12 @@ dslist = [
 #             'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },  
 #    # contains single node graph, node symb
 #    {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', },  # node/edge symb
    {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
    {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', },  # unlabeled
    {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
             'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},  # node/edge symb
 #    {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
 #    # node nsymb
    {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
    # node symb/nsymb
 ]

@@ -45,7 +45,7 @@ def run_ms(dataset, y, ds):

    _, gram_matrix_time, _, _, _ = compute_gram_matrices(
                dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
                '../notebooks/results/' + estimator.__name__, ds['name'],
                '../../notebooks/results/' + estimator.__name__, ds['name'],
                n_jobs=multiprocessing.cpu_count(), verbose=False)
    average_gram_matrix_time = np.mean(gram_matrix_time)
    std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
--- a/preimage/pathfrequency.py
+++ b/preimage/pathfrequency.py
@@ -0,0 +1,28 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Mar 20 10:12:15 2019

 inferring a graph grom path frequency.
@author: ljia
 """

 def SISF(K, v):
    if output:
        return output
    else:
        return 'no solution'
    
 def SISF_M(K, v):
    return output

 def GIPF_tree(K, v):
    if K == 1:
        pass
    if G:
        return G
    else:
        return 'no solution'
    
 def GIPF_M(K, v):
    return G
--- a/pygraph/kernels/unfinished/cyclicPatternKernel.py
+++ b/pygraph/kernels/unfinished/cyclicPatternKernel.py
--- a/pygraph/kernels/unfinished/pathKernel.py
+++ b/pygraph/kernels/unfinished/pathKernel.py
--- a/pygraph/kernels/unfinished/treePatternKernel.py
+++ b/pygraph/kernels/unfinished/treePatternKernel.py
--- a/pygraph/kernels/unfinished/treeletKernel.py
+++ b/pygraph/kernels/unfinished/treeletKernel.py
--- a/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py
+++ b/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py
--- a/pygraph/utils/model_selection_precomputed.py
+++ b/pygraph/utils/model_selection_precomputed.py
@@ -139,9 +139,9 @@ def model_selection_for_precomputed_kernel(datafile,
                y = [y[idxt] for idxt in idx_trim] # trim y accordingly
 #            Kmatrix = np.random.rand(2250, 2250)
 #            current_run_time = 0.1
    
            Kmatrix_diag = Kmatrix.diagonal().copy()
            
            # remove graphs whose kernels with themselves are zeros
            Kmatrix_diag = Kmatrix.diagonal().copy()
            nb_g_ignore = 0
            for idxk, diag in enumerate(Kmatrix_diag):
                if diag == 0:
@@ -149,6 +149,7 @@ def model_selection_for_precomputed_kernel(datafile,
                    Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)
                    nb_g_ignore += 1
            # normalization
            Kmatrix_diag = Kmatrix.diagonal().copy()
            for i in range(len(Kmatrix)):
                for j in range(i, len(Kmatrix)):
                    Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
--- a/pygraph/utils/unfinished/openblassettings.py
+++ b/pygraph/utils/unfinished/openblassettings.py
--- a/pygraph/utils/unused/suffix_tree.py
+++ b/pygraph/utils/unused/suffix_tree.py