@@ -1,62 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Oct 17 16:07:38 2018 | |||
@author: ljia | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.graphdataset import get_dataset_attributes | |||
dslist = [ | |||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',}, | |||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', | |||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',}, | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',}, | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',}, | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, | |||
{'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, | |||
{'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, | |||
{'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, | |||
{'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, | |||
{'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, | |||
{'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, | |||
{'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, | |||
{'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, | |||
{'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, | |||
{'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, | |||
{'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||
'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, | |||
{'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||
'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, | |||
{'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, | |||
# # not working below | |||
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
] | |||
for ds in dslist: | |||
dataset, y = loadDataset( | |||
ds['dataset'], | |||
filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None)) | |||
attrs = get_dataset_attributes( | |||
dataset, target=y, node_label='atom', edge_label='bond_type') | |||
print() | |||
print(ds['name'] + ':') | |||
for atr in attrs: | |||
print(atr, ':', attrs[atr]) | |||
print() |
@@ -1,815 +0,0 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"\n", | |||
"Acyclic\n", | |||
"\n", | |||
"--- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting sp graphs: 183it [00:00, 2198.32it/s]\n", | |||
"calculating kernels: 16836it [00:17, 983.99it/s] \n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 183 built in 17.32457208633423 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f63ab934158>, 'nsymb': <function gaussiankernel at 0x7f63ab9987b8>, 'mix': functools.partial(<function kernelproduct at 0x7f63ab951158>, <function deltakernel at 0x7f63ab934158>, <function gaussiankernel at 0x7f63ab9987b8>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 30it [00:12, 2.48it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f63ab934158>, 'nsymb': <function gaussiankernel at 0x7f63ab9987b8>, 'mix': functools.partial(<function kernelproduct at 0x7f63ab951158>, <function deltakernel at 0x7f63ab934158>, <function gaussiankernel at 0x7f63ab9987b8>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'alpha': 3.1622776601683795e-10}]\n", | |||
"\n", | |||
"best_val_perf: 9.64631220504699\n", | |||
"best_val_std: 0.6555235266552757\n", | |||
"final_performance: [9.306976995404987]\n", | |||
"final_confidence: [2.317244919360123]\n", | |||
"train_performance: [6.190191405968441]\n", | |||
"train_std: [0.21512408952827894]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 17.32±nans\n", | |||
"time to calculate best gram matrix: 17.32±nans\n", | |||
"total training time with all hyper-param choices: 33.16s\n", | |||
"\n" | |||
] | |||
}, | |||
{ | |||
"name": "stderr", | |||
"output_type": "stream", | |||
"text": [ | |||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", | |||
" keepdims=keepdims)\n", | |||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||
" ret = ret.dtype.type(ret / rcount)\n" | |||
] | |||
}, | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Filename: ../pygraph/utils/model_selection_precomputed.py\n", | |||
"\n", | |||
"Line # Mem usage Increment Line Contents\n", | |||
"================================================\n", | |||
" 24 115.1 MiB 115.1 MiB @profile\n", | |||
" 25 def model_selection_for_precomputed_kernel(datafile,\n", | |||
" 26 estimator,\n", | |||
" 27 param_grid_precomputed,\n", | |||
" 28 param_grid,\n", | |||
" 29 model_type,\n", | |||
" 30 NUM_TRIALS=30,\n", | |||
" 31 datafile_y=None,\n", | |||
" 32 extra_params=None,\n", | |||
" 33 ds_name='ds-unknown',\n", | |||
" 34 n_jobs=1,\n", | |||
" 35 read_gm_from_file=False):\n", | |||
" 36 \"\"\"Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results.\n", | |||
" 37 \n", | |||
" 38 Parameters\n", | |||
" 39 ----------\n", | |||
" 40 datafile : string\n", | |||
" 41 Path of dataset file.\n", | |||
" 42 estimator : function\n", | |||
" 43 kernel function used to estimate. This function needs to return a gram matrix.\n", | |||
" 44 param_grid_precomputed : dictionary\n", | |||
" 45 Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n", | |||
" 46 param_grid : dictionary\n", | |||
" 47 Dictionary with names (string) of parameters used as penelties as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n", | |||
" 48 model_type : string\n", | |||
" 49 Typr of the problem, can be regression or classification.\n", | |||
" 50 NUM_TRIALS : integer\n", | |||
" 51 Number of random trials of outer cv loop. The default is 30.\n", | |||
" 52 datafile_y : string\n", | |||
" 53 Path of file storing y data. This parameter is optional depending on the given dataset file.\n", | |||
" 54 read_gm_from_file : boolean\n", | |||
" 55 Whether gram matrices are loaded from file.\n", | |||
" 56 \n", | |||
" 57 Examples\n", | |||
" 58 --------\n", | |||
" 59 >>> import numpy as np\n", | |||
" 60 >>> import sys\n", | |||
" 61 >>> sys.path.insert(0, \"../\")\n", | |||
" 62 >>> from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n", | |||
" 63 >>> from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", | |||
" 64 >>>\n", | |||
" 65 >>> datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", | |||
" 66 >>> estimator = weisfeilerlehmankernel\n", | |||
" 67 >>> param_grid_precomputed = {'height': [0,1,2,3,4,5,6,7,8,9,10], 'base_kernel': ['subtree']}\n", | |||
" 68 >>> param_grid = {\"alpha\": np.logspace(-2, 2, num = 10, base = 10)}\n", | |||
" 69 >>>\n", | |||
" 70 >>> model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, 'regression')\n", | |||
" 71 \"\"\"\n", | |||
" 72 115.1 MiB 0.0 MiB tqdm.monitor_interval = 0\n", | |||
" 73 \n", | |||
" 74 115.1 MiB 0.0 MiB results_dir = '../notebooks/results/' + estimator.__name__\n", | |||
" 75 115.1 MiB 0.0 MiB if not os.path.exists(results_dir):\n", | |||
" 76 os.makedirs(results_dir)\n", | |||
" 77 # a string to save all the results.\n", | |||
" 78 115.1 MiB 0.0 MiB str_fw = '###################### log time: ' + datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\") + '. ######################\\n\\n'\n", | |||
" 79 115.1 MiB 0.0 MiB str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\\n# including gram matrices, serial numbers for gram matrix figures and performance.\\n\\n'\n", | |||
" 80 \n", | |||
" 81 # setup the model type\n", | |||
" 82 115.1 MiB 0.0 MiB model_type = model_type.lower()\n", | |||
" 83 115.1 MiB 0.0 MiB if model_type != 'regression' and model_type != 'classification':\n", | |||
" 84 raise Exception(\n", | |||
" 85 'The model type is incorrect! Please choose from regression or classification.'\n", | |||
" 86 )\n", | |||
" 87 115.1 MiB 0.0 MiB print()\n", | |||
" 88 115.1 MiB 0.0 MiB print('--- This is a %s problem ---' % model_type)\n", | |||
" 89 115.1 MiB 0.0 MiB str_fw += 'This is a %s problem.\\n' % model_type\n", | |||
" 90 \n", | |||
" 91 # calculate gram matrices rather than read them from file.\n", | |||
" 92 115.1 MiB 0.0 MiB if read_gm_from_file == False:\n", | |||
" 93 # Load the dataset\n", | |||
" 94 115.1 MiB 0.0 MiB print()\n", | |||
" 95 115.1 MiB 0.0 MiB print('\\n1. Loading dataset from file...')\n", | |||
" 96 115.1 MiB 0.0 MiB if isinstance(datafile, str):\n", | |||
" 97 115.1 MiB 0.0 MiB dataset, y_all = loadDataset(\n", | |||
" 98 116.3 MiB 1.2 MiB datafile, filename_y=datafile_y, extra_params=extra_params)\n", | |||
" 99 else: # load data directly from variable.\n", | |||
" 100 dataset = datafile\n", | |||
" 101 y_all = datafile_y \n", | |||
" 102 \n", | |||
" 103 # import matplotlib.pyplot as plt\n", | |||
" 104 # import networkx as nx\n", | |||
" 105 # nx.draw_networkx(dataset[30])\n", | |||
" 106 # plt.show()\n", | |||
" 107 \n", | |||
" 108 # Grid of parameters with a discrete number of values for each.\n", | |||
" 109 116.3 MiB 0.0 MiB param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n", | |||
" 110 116.3 MiB 0.0 MiB param_list = list(ParameterGrid(param_grid))\n", | |||
" 111 \n", | |||
" 112 116.3 MiB 0.0 MiB gram_matrices = [\n", | |||
" 113 ] # a list to store gram matrices for all param_grid_precomputed\n", | |||
" 114 116.3 MiB 0.0 MiB gram_matrix_time = [\n", | |||
" 115 ] # a list to store time to calculate gram matrices\n", | |||
" 116 116.3 MiB 0.0 MiB param_list_pre_revised = [\n", | |||
" 117 ] # list to store param grids precomputed ignoring the useless ones\n", | |||
" 118 \n", | |||
" 119 # calculate all gram matrices\n", | |||
" 120 116.3 MiB 0.0 MiB print()\n", | |||
" 121 116.3 MiB 0.0 MiB print('2. Calculating gram matrices. This could take a while...')\n", | |||
" 122 116.3 MiB 0.0 MiB str_fw += '\\nII. Gram matrices.\\n\\n'\n", | |||
" 123 116.3 MiB 0.0 MiB tts = time.time() # start training time\n", | |||
" 124 116.3 MiB 0.0 MiB nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)\n", | |||
" 125 144.8 MiB 0.0 MiB for idx, params_out in enumerate(param_list_precomputed):\n", | |||
" 126 116.3 MiB 0.0 MiB y = y_all[:]\n", | |||
" 127 116.3 MiB 0.0 MiB params_out['n_jobs'] = n_jobs\n", | |||
" 128 # print(dataset)\n", | |||
" 129 # import networkx as nx\n", | |||
" 130 # nx.draw_networkx(dataset[1])\n", | |||
" 131 # plt.show()\n", | |||
" 132 119.1 MiB 2.8 MiB rtn_data = estimator(dataset[:], **params_out)\n", | |||
" 133 119.1 MiB 0.0 MiB Kmatrix = rtn_data[0]\n", | |||
" 134 119.1 MiB 0.0 MiB current_run_time = rtn_data[1]\n", | |||
" 135 # for some kernels, some graphs in datasets may not meet the \n", | |||
" 136 # kernels' requirements for graph structure. These graphs are trimmed. \n", | |||
" 137 119.1 MiB 0.0 MiB if len(rtn_data) == 3:\n", | |||
" 138 119.1 MiB 0.0 MiB idx_trim = rtn_data[2] # the index of trimmed graph list\n", | |||
" 139 119.1 MiB 0.0 MiB y = [y[idxt] for idxt in idx_trim] # trim y accordingly\n", | |||
" 140 # Kmatrix = np.random.rand(2250, 2250)\n", | |||
" 141 # current_run_time = 0.1\n", | |||
" 142 \n", | |||
" 143 119.1 MiB 0.0 MiB Kmatrix_diag = Kmatrix.diagonal().copy()\n", | |||
" 144 # remove graphs whose kernels with themselves are zeros\n", | |||
" 145 119.1 MiB 0.0 MiB nb_g_ignore = 0\n", | |||
" 146 119.1 MiB 0.0 MiB for idxk, diag in enumerate(Kmatrix_diag):\n", | |||
" 147 119.1 MiB 0.0 MiB if diag == 0:\n", | |||
" 148 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)\n", | |||
" 149 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)\n", | |||
" 150 nb_g_ignore += 1\n", | |||
" 151 # normalization\n", | |||
" 152 119.1 MiB 0.0 MiB for i in range(len(Kmatrix)):\n", | |||
" 153 119.1 MiB 0.0 MiB for j in range(i, len(Kmatrix)):\n", | |||
" 154 119.1 MiB 0.0 MiB Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])\n", | |||
" 155 119.1 MiB 0.0 MiB Kmatrix[j][i] = Kmatrix[i][j]\n", | |||
" 156 \n", | |||
" 157 119.1 MiB 0.0 MiB print()\n", | |||
" 158 119.1 MiB 0.0 MiB if params_out == {}:\n", | |||
" 159 print('the gram matrix is: ')\n", | |||
" 160 str_fw += 'the gram matrix is:\\n\\n'\n", | |||
" 161 else:\n", | |||
" 162 119.1 MiB 0.0 MiB print('the gram matrix with parameters', params_out, 'is: ')\n", | |||
" 163 119.1 MiB 0.0 MiB str_fw += 'the gram matrix with parameters %s is:\\n\\n' % params_out\n", | |||
" 164 119.1 MiB 0.0 MiB if len(Kmatrix) < 2:\n", | |||
" 165 nb_gm_ignore += 1\n", | |||
" 166 print('ignored, as at most only one of all its diagonal value is non-zero.')\n", | |||
" 167 str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\\n\\n'\n", | |||
" 168 else: \n", | |||
" 169 119.1 MiB 0.0 MiB if np.isnan(Kmatrix).any(\n", | |||
" 170 ): # if the matrix contains elements that are not numbers\n", | |||
" 171 nb_gm_ignore += 1\n", | |||
" 172 print('ignored, as it contains elements that are not numbers.')\n", | |||
" 173 str_fw += 'ignored, as it contains elements that are not numbers.\\n\\n'\n", | |||
" 174 else:\n", | |||
" 175 # print(Kmatrix)\n", | |||
" 176 119.1 MiB 0.0 MiB str_fw += np.array2string(\n", | |||
" 177 119.1 MiB 0.0 MiB Kmatrix,\n", | |||
" 178 119.1 MiB 0.0 MiB separator=',') + '\\n\\n'\n", | |||
" 179 # separator=',',\n", | |||
" 180 # threshold=np.inf,\n", | |||
" 181 # floatmode='unique') + '\\n\\n'\n", | |||
" 182 \n", | |||
" 183 119.1 MiB 0.0 MiB fig_file_name = results_dir + '/GM[ds]' + ds_name\n", | |||
" 184 119.1 MiB 0.0 MiB if params_out != {}:\n", | |||
" 185 119.1 MiB 0.0 MiB fig_file_name += '[params]' + str(idx)\n", | |||
" 186 119.8 MiB 0.7 MiB plt.imshow(Kmatrix)\n", | |||
" 187 119.9 MiB 0.1 MiB plt.colorbar()\n", | |||
" 188 144.8 MiB 24.9 MiB plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)\n", | |||
" 189 # plt.show()\n", | |||
" 190 144.8 MiB 0.0 MiB plt.clf()\n", | |||
" 191 144.8 MiB 0.0 MiB gram_matrices.append(Kmatrix)\n", | |||
" 192 144.8 MiB 0.0 MiB gram_matrix_time.append(current_run_time)\n", | |||
" 193 144.8 MiB 0.0 MiB param_list_pre_revised.append(params_out)\n", | |||
" 194 144.8 MiB 0.0 MiB if nb_g_ignore > 0:\n", | |||
" 195 print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)\n", | |||
" 196 str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore\n", | |||
" 197 144.8 MiB 0.0 MiB print()\n", | |||
" 198 144.8 MiB 0.0 MiB print(\n", | |||
" 199 144.8 MiB 0.0 MiB '{} gram matrices are calculated, {} of which are ignored.'.format(\n", | |||
" 200 144.8 MiB 0.0 MiB len(param_list_precomputed), nb_gm_ignore))\n", | |||
" 201 144.8 MiB 0.0 MiB str_fw += '{} gram matrices are calculated, {} of which are ignored.\\n\\n'.format(len(param_list_precomputed), nb_gm_ignore)\n", | |||
" 202 144.8 MiB 0.0 MiB str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\\n\\n'\n", | |||
" 203 144.8 MiB 0.0 MiB str_fw += ''.join([\n", | |||
" 204 144.8 MiB 0.0 MiB '{}: {}\\n'.format(idx, params_out)\n", | |||
" 205 144.8 MiB 0.0 MiB for idx, params_out in enumerate(param_list_precomputed)\n", | |||
" 206 ])\n", | |||
" 207 \n", | |||
" 208 144.8 MiB 0.0 MiB print()\n", | |||
" 209 144.8 MiB 0.0 MiB if len(gram_matrices) == 0:\n", | |||
" 210 print('all gram matrices are ignored, no results obtained.')\n", | |||
" 211 str_fw += '\\nall gram matrices are ignored, no results obtained.\\n\\n'\n", | |||
" 212 else:\n", | |||
" 213 # save gram matrices to file.\n", | |||
" 214 144.8 MiB 0.0 MiB np.savez(results_dir + '/' + ds_name + '.gm', \n", | |||
" 215 144.8 MiB 0.0 MiB gms=gram_matrices, params=param_list_pre_revised, y=y, \n", | |||
" 216 144.9 MiB 0.1 MiB gmtime=gram_matrix_time)\n", | |||
" 217 \n", | |||
" 218 144.9 MiB 0.0 MiB print(\n", | |||
" 219 144.9 MiB 0.0 MiB '3. Fitting and predicting using nested cross validation. This could really take a while...'\n", | |||
" 220 )\n", | |||
" 221 \n", | |||
" 222 # ---- use pool.imap_unordered to parallel and track progress. ----\n", | |||
" 223 # train_pref = []\n", | |||
" 224 # val_pref = []\n", | |||
" 225 # test_pref = []\n", | |||
" 226 # def func_assign(result, var_to_assign):\n", | |||
" 227 # for idx, itm in enumerate(var_to_assign):\n", | |||
" 228 # itm.append(result[idx]) \n", | |||
" 229 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n", | |||
" 230 # \n", | |||
" 231 # parallel_me(trial_do_partial, range(NUM_TRIALS), func_assign, \n", | |||
" 232 # [train_pref, val_pref, test_pref], glbv=gram_matrices,\n", | |||
" 233 # method='imap_unordered', n_jobs=n_jobs, chunksize=1,\n", | |||
" 234 # itr_desc='cross validation')\n", | |||
" 235 \n", | |||
" 236 144.9 MiB 0.0 MiB def init_worker(gms_toshare):\n", | |||
" 237 global G_gms\n", | |||
" 238 G_gms = gms_toshare\n", | |||
" 239 \n", | |||
" 240 # gram_matrices = np.array(gram_matrices)\n", | |||
" 241 # gms_shape = gram_matrices.shape\n", | |||
" 242 # gms_array = Array('d', np.reshape(gram_matrices.copy(), -1, order='C'))\n", | |||
" 243 # pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gms_array, gms_shape))\n", | |||
" 244 144.9 MiB 0.1 MiB pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n", | |||
" 245 144.9 MiB 0.0 MiB trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n", | |||
" 246 144.9 MiB 0.0 MiB train_pref = []\n", | |||
" 247 144.9 MiB 0.0 MiB val_pref = []\n", | |||
" 248 144.9 MiB 0.0 MiB test_pref = []\n", | |||
" 249 # if NUM_TRIALS < 1000 * n_jobs:\n", | |||
" 250 # chunksize = int(NUM_TRIALS / n_jobs) + 1\n", | |||
" 251 # else:\n", | |||
" 252 # chunksize = 1000\n", | |||
" 253 144.9 MiB 0.0 MiB chunksize = 1\n", | |||
" 254 145.1 MiB 0.1 MiB for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n", | |||
" 255 145.1 MiB 0.0 MiB train_pref.append(o1)\n", | |||
" 256 145.1 MiB 0.0 MiB val_pref.append(o2)\n", | |||
" 257 145.1 MiB 0.0 MiB test_pref.append(o3)\n", | |||
" 258 145.1 MiB 0.0 MiB pool.close()\n", | |||
" 259 145.1 MiB 0.0 MiB pool.join()\n", | |||
" 260 \n", | |||
" 261 # # ---- use pool.map to parallel. ----\n", | |||
" 262 # pool = Pool(n_jobs)\n", | |||
" 263 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y[0:250], model_type)\n", | |||
" 264 # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n", | |||
" 265 # train_pref = [item[0] for item in result_perf]\n", | |||
" 266 # val_pref = [item[1] for item in result_perf]\n", | |||
" 267 # test_pref = [item[2] for item in result_perf]\n", | |||
" 268 \n", | |||
" 269 # # ---- direct running, normally use a single CPU core. ----\n", | |||
" 270 # train_pref = []\n", | |||
" 271 # val_pref = []\n", | |||
" 272 # test_pref = []\n", | |||
" 273 # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n", | |||
" 274 # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n", | |||
" 275 # train_pref.append(o1)\n", | |||
" 276 # val_pref.append(o2)\n", | |||
" 277 # test_pref.append(o3)\n", | |||
" 278 # print()\n", | |||
" 279 \n", | |||
" 280 145.1 MiB 0.0 MiB print()\n", | |||
" 281 145.1 MiB 0.0 MiB print('4. Getting final performance...')\n", | |||
" 282 145.1 MiB 0.0 MiB str_fw += '\\nIII. Performance.\\n\\n'\n", | |||
" 283 # averages and confidences of performances on outer trials for each combination of parameters\n", | |||
" 284 145.1 MiB 0.0 MiB average_train_scores = np.mean(train_pref, axis=0)\n", | |||
" 285 # print('val_pref: ', val_pref[0][0])\n", | |||
" 286 145.1 MiB 0.0 MiB average_val_scores = np.mean(val_pref, axis=0)\n", | |||
" 287 # print('test_pref: ', test_pref[0][0])\n", | |||
" 288 145.1 MiB 0.0 MiB average_perf_scores = np.mean(test_pref, axis=0)\n", | |||
" 289 # sample std is used here\n", | |||
" 290 145.1 MiB 0.0 MiB std_train_scores = np.std(train_pref, axis=0, ddof=1)\n", | |||
" 291 145.1 MiB 0.0 MiB std_val_scores = np.std(val_pref, axis=0, ddof=1)\n", | |||
" 292 145.1 MiB 0.0 MiB std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n", | |||
" 293 \n", | |||
" 294 145.1 MiB 0.0 MiB if model_type == 'regression':\n", | |||
" 295 145.1 MiB 0.0 MiB best_val_perf = np.amin(average_val_scores)\n", | |||
" 296 else:\n", | |||
" 297 best_val_perf = np.amax(average_val_scores)\n", | |||
" 298 # print('average_val_scores: ', average_val_scores)\n", | |||
" 299 # print('best_val_perf: ', best_val_perf)\n", | |||
" 300 # print()\n", | |||
" 301 145.1 MiB 0.0 MiB best_params_index = np.where(average_val_scores == best_val_perf)\n", | |||
" 302 # find smallest val std with best val perf.\n", | |||
" 303 best_val_stds = [\n", | |||
" 304 145.1 MiB 0.0 MiB std_val_scores[value][best_params_index[1][idx]]\n", | |||
" 305 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 306 ]\n", | |||
" 307 145.1 MiB 0.0 MiB min_val_std = np.amin(best_val_stds)\n", | |||
" 308 145.1 MiB 0.0 MiB best_params_index = np.where(std_val_scores == min_val_std)\n", | |||
" 309 best_params_out = [\n", | |||
" 310 145.1 MiB 0.0 MiB param_list_pre_revised[i] for i in best_params_index[0]\n", | |||
" 311 ]\n", | |||
" 312 145.1 MiB 0.0 MiB best_params_in = [param_list[i] for i in best_params_index[1]]\n", | |||
" 313 145.1 MiB 0.0 MiB print('best_params_out: ', best_params_out)\n", | |||
" 314 145.1 MiB 0.0 MiB print('best_params_in: ', best_params_in)\n", | |||
" 315 145.1 MiB 0.0 MiB print()\n", | |||
" 316 145.1 MiB 0.0 MiB print('best_val_perf: ', best_val_perf)\n", | |||
" 317 145.1 MiB 0.0 MiB print('best_val_std: ', min_val_std)\n", | |||
" 318 145.1 MiB 0.0 MiB str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n", | |||
" 319 145.1 MiB 0.0 MiB str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n", | |||
" 320 145.1 MiB 0.0 MiB str_fw += 'best_val_perf: %s\\n' % best_val_perf\n", | |||
" 321 145.1 MiB 0.0 MiB str_fw += 'best_val_std: %s\\n' % min_val_std\n", | |||
" 322 \n", | |||
" 323 # print(best_params_index)\n", | |||
" 324 # print(best_params_index[0])\n", | |||
" 325 # print(average_perf_scores)\n", | |||
" 326 final_performance = [\n", | |||
" 327 145.1 MiB 0.0 MiB average_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 328 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 329 ]\n", | |||
" 330 final_confidence = [\n", | |||
" 331 145.1 MiB 0.0 MiB std_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 332 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 333 ]\n", | |||
" 334 145.1 MiB 0.0 MiB print('final_performance: ', final_performance)\n", | |||
" 335 145.1 MiB 0.0 MiB print('final_confidence: ', final_confidence)\n", | |||
" 336 145.1 MiB 0.0 MiB str_fw += 'final_performance: %s\\n' % final_performance\n", | |||
" 337 145.1 MiB 0.0 MiB str_fw += 'final_confidence: %s\\n' % final_confidence\n", | |||
" 338 train_performance = [\n", | |||
" 339 145.1 MiB 0.0 MiB average_train_scores[value][best_params_index[1][idx]]\n", | |||
" 340 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 341 ]\n", | |||
" 342 train_std = [\n", | |||
" 343 145.1 MiB 0.0 MiB std_train_scores[value][best_params_index[1][idx]]\n", | |||
" 344 145.1 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 345 ]\n", | |||
" 346 145.1 MiB 0.0 MiB print('train_performance: %s' % train_performance)\n", | |||
" 347 145.1 MiB 0.0 MiB print('train_std: ', train_std)\n", | |||
" 348 145.1 MiB 0.0 MiB str_fw += 'train_performance: %s\\n' % train_performance\n", | |||
" 349 145.1 MiB 0.0 MiB str_fw += 'train_std: %s\\n\\n' % train_std\n", | |||
" 350 \n", | |||
" 351 145.1 MiB 0.0 MiB print()\n", | |||
" 352 145.1 MiB 0.0 MiB tt_total = time.time() - tts # training time for all hyper-parameters\n", | |||
" 353 145.1 MiB 0.0 MiB average_gram_matrix_time = np.mean(gram_matrix_time)\n", | |||
" 354 145.1 MiB 0.0 MiB std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n", | |||
" 355 best_gram_matrix_time = [\n", | |||
" 356 145.1 MiB 0.0 MiB gram_matrix_time[i] for i in best_params_index[0]\n", | |||
" 357 ]\n", | |||
" 358 145.1 MiB 0.0 MiB ave_bgmt = np.mean(best_gram_matrix_time)\n", | |||
" 359 145.1 MiB 0.0 MiB std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n", | |||
" 360 145.1 MiB 0.0 MiB print(\n", | |||
" 361 145.1 MiB 0.0 MiB 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n", | |||
" 362 145.1 MiB 0.0 MiB .format(average_gram_matrix_time, std_gram_matrix_time))\n", | |||
" 363 145.1 MiB 0.0 MiB print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n", | |||
" 364 145.1 MiB 0.0 MiB ave_bgmt, std_bgmt))\n", | |||
" 365 145.1 MiB 0.0 MiB print(\n", | |||
" 366 145.1 MiB 0.0 MiB 'total training time with all hyper-param choices: {:.2f}s'.format(\n", | |||
" 367 145.1 MiB 0.0 MiB tt_total))\n", | |||
" 368 145.1 MiB 0.0 MiB str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n", | |||
" 369 145.1 MiB 0.0 MiB str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n", | |||
" 370 145.1 MiB 0.0 MiB str_fw += 'total training time with all hyper-param choices: {:.2f}s\\n\\n'.format(tt_total)\n", | |||
" 371 \n", | |||
" 372 # # save results to file\n", | |||
" 373 # np.savetxt(results_name_pre + 'average_train_scores.dt',\n", | |||
" 374 # average_train_scores)\n", | |||
" 375 # np.savetxt(results_name_pre + 'average_val_scores', average_val_scores)\n", | |||
" 376 # np.savetxt(results_name_pre + 'average_perf_scores.dt',\n", | |||
" 377 # average_perf_scores)\n", | |||
" 378 # np.savetxt(results_name_pre + 'std_train_scores.dt', std_train_scores)\n", | |||
" 379 # np.savetxt(results_name_pre + 'std_val_scores.dt', std_val_scores)\n", | |||
" 380 # np.savetxt(results_name_pre + 'std_perf_scores.dt', std_perf_scores)\n", | |||
" 381 \n", | |||
" 382 # np.save(results_name_pre + 'best_params_index', best_params_index)\n", | |||
" 383 # np.save(results_name_pre + 'best_params_pre.dt', best_params_out)\n", | |||
" 384 # np.save(results_name_pre + 'best_params_in.dt', best_params_in)\n", | |||
" 385 # np.save(results_name_pre + 'best_val_perf.dt', best_val_perf)\n", | |||
" 386 # np.save(results_name_pre + 'best_val_std.dt', best_val_std)\n", | |||
" 387 # np.save(results_name_pre + 'final_performance.dt', final_performance)\n", | |||
" 388 # np.save(results_name_pre + 'final_confidence.dt', final_confidence)\n", | |||
" 389 # np.save(results_name_pre + 'train_performance.dt', train_performance)\n", | |||
" 390 # np.save(results_name_pre + 'train_std.dt', train_std)\n", | |||
" 391 \n", | |||
" 392 # np.save(results_name_pre + 'gram_matrix_time.dt', gram_matrix_time)\n", | |||
" 393 # np.save(results_name_pre + 'average_gram_matrix_time.dt',\n", | |||
" 394 # average_gram_matrix_time)\n", | |||
" 395 # np.save(results_name_pre + 'std_gram_matrix_time.dt',\n", | |||
" 396 # std_gram_matrix_time)\n", | |||
" 397 # np.save(results_name_pre + 'best_gram_matrix_time.dt',\n", | |||
" 398 # best_gram_matrix_time)\n", | |||
" 399 \n", | |||
" 400 # print out as table.\n", | |||
" 401 145.1 MiB 0.0 MiB from collections import OrderedDict\n", | |||
" 402 145.1 MiB 0.0 MiB from tabulate import tabulate\n", | |||
" 403 145.1 MiB 0.0 MiB table_dict = {}\n", | |||
" 404 145.1 MiB 0.0 MiB if model_type == 'regression':\n", | |||
" 405 145.1 MiB 0.0 MiB for param_in in param_list:\n", | |||
" 406 145.1 MiB 0.0 MiB param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n", | |||
" 407 else:\n", | |||
" 408 for param_in in param_list:\n", | |||
" 409 param_in['C'] = '{:.2e}'.format(param_in['C'])\n", | |||
" 410 145.1 MiB 0.0 MiB table_dict['params'] = [{**param_out, **param_in}\n", | |||
" 411 145.1 MiB 0.0 MiB for param_in in param_list for param_out in param_list_pre_revised]\n", | |||
" 412 table_dict['gram_matrix_time'] = [\n", | |||
" 413 145.1 MiB 0.0 MiB '{:.2f}'.format(gram_matrix_time[index_out])\n", | |||
" 414 145.1 MiB 0.0 MiB for param_in in param_list\n", | |||
" 415 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 416 ]\n", | |||
" 417 table_dict['valid_perf'] = [\n", | |||
" 418 145.1 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n", | |||
" 419 std_val_scores[index_out][index_in])\n", | |||
" 420 145.1 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n", | |||
" 421 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 422 ]\n", | |||
" 423 table_dict['test_perf'] = [\n", | |||
" 424 145.1 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n", | |||
" 425 std_perf_scores[index_out][index_in])\n", | |||
" 426 145.1 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n", | |||
" 427 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 428 ]\n", | |||
" 429 table_dict['train_perf'] = [\n", | |||
" 430 145.1 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n", | |||
" 431 std_train_scores[index_out][index_in])\n", | |||
" 432 145.1 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n", | |||
" 433 145.1 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 434 ]\n", | |||
" 435 keyorder = [\n", | |||
" 436 145.1 MiB 0.0 MiB 'params', 'train_perf', 'valid_perf', 'test_perf',\n", | |||
" 437 145.1 MiB 0.0 MiB 'gram_matrix_time'\n", | |||
" 438 ]\n", | |||
" 439 145.1 MiB 0.0 MiB print()\n", | |||
" 440 145.1 MiB 0.0 MiB tb_print = tabulate(\n", | |||
" 441 145.1 MiB 0.0 MiB OrderedDict(\n", | |||
" 442 145.1 MiB 0.0 MiB sorted(table_dict.items(),\n", | |||
" 443 145.1 MiB 0.0 MiB key=lambda i: keyorder.index(i[0]))),\n", | |||
" 444 145.1 MiB 0.0 MiB headers='keys')\n", | |||
" 445 # print(tb_print)\n", | |||
" 446 145.1 MiB 0.0 MiB str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n", | |||
" 447 \n", | |||
" 448 # read gram matrices from file.\n", | |||
" 449 else: \n", | |||
" 450 # Grid of parameters with a discrete number of values for each.\n", | |||
" 451 # param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n", | |||
" 452 param_list = list(ParameterGrid(param_grid))\n", | |||
" 453 \n", | |||
" 454 # read gram matrices from file.\n", | |||
" 455 print()\n", | |||
" 456 print('2. Reading gram matrices from file...')\n", | |||
" 457 str_fw += '\\nII. Gram matrices.\\n\\nGram matrices are read from file, see last log for detail.\\n'\n", | |||
" 458 gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')\n", | |||
" 459 gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed\n", | |||
" 460 gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices\n", | |||
" 461 param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones\n", | |||
" 462 y = gmfile['y'].tolist()\n", | |||
" 463 \n", | |||
" 464 tts = time.time() # start training time\n", | |||
" 465 # nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) \n", | |||
" 466 print(\n", | |||
" 467 '3. Fitting and predicting using nested cross validation. This could really take a while...'\n", | |||
" 468 )\n", | |||
" 469 \n", | |||
" 470 # ---- use pool.imap_unordered to parallel and track progress. ----\n", | |||
" 471 def init_worker(gms_toshare):\n", | |||
" 472 global G_gms\n", | |||
" 473 G_gms = gms_toshare\n", | |||
" 474 \n", | |||
" 475 pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n", | |||
" 476 trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n", | |||
" 477 train_pref = []\n", | |||
" 478 val_pref = []\n", | |||
" 479 test_pref = []\n", | |||
" 480 chunksize = 1\n", | |||
" 481 for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n", | |||
" 482 train_pref.append(o1)\n", | |||
" 483 val_pref.append(o2)\n", | |||
" 484 test_pref.append(o3)\n", | |||
" 485 pool.close()\n", | |||
" 486 pool.join()\n", | |||
" 487 \n", | |||
" 488 # # ---- use pool.map to parallel. ----\n", | |||
" 489 # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n", | |||
" 490 # train_pref = [item[0] for item in result_perf]\n", | |||
" 491 # val_pref = [item[1] for item in result_perf]\n", | |||
" 492 # test_pref = [item[2] for item in result_perf]\n", | |||
" 493 \n", | |||
" 494 # # ---- use joblib.Parallel to parallel and track progress. ----\n", | |||
" 495 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)\n", | |||
" 496 # result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS))\n", | |||
" 497 # train_pref = [item[0] for item in result_perf]\n", | |||
" 498 # val_pref = [item[1] for item in result_perf]\n", | |||
" 499 # test_pref = [item[2] for item in result_perf]\n", | |||
" 500 \n", | |||
" 501 # # ---- direct running, normally use a single CPU core. ----\n", | |||
" 502 # train_pref = []\n", | |||
" 503 # val_pref = []\n", | |||
" 504 # test_pref = []\n", | |||
" 505 # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n", | |||
" 506 # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n", | |||
" 507 # train_pref.append(o1)\n", | |||
" 508 # val_pref.append(o2)\n", | |||
" 509 # test_pref.append(o3)\n", | |||
" 510 \n", | |||
" 511 print()\n", | |||
" 512 print('4. Getting final performance...')\n", | |||
" 513 str_fw += '\\nIII. Performance.\\n\\n'\n", | |||
" 514 # averages and confidences of performances on outer trials for each combination of parameters\n", | |||
" 515 average_train_scores = np.mean(train_pref, axis=0)\n", | |||
" 516 average_val_scores = np.mean(val_pref, axis=0)\n", | |||
" 517 average_perf_scores = np.mean(test_pref, axis=0)\n", | |||
" 518 # sample std is used here\n", | |||
" 519 std_train_scores = np.std(train_pref, axis=0, ddof=1)\n", | |||
" 520 std_val_scores = np.std(val_pref, axis=0, ddof=1)\n", | |||
" 521 std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n", | |||
" 522 \n", | |||
" 523 if model_type == 'regression':\n", | |||
" 524 best_val_perf = np.amin(average_val_scores)\n", | |||
" 525 else:\n", | |||
" 526 best_val_perf = np.amax(average_val_scores)\n", | |||
" 527 best_params_index = np.where(average_val_scores == best_val_perf)\n", | |||
" 528 # find smallest val std with best val perf.\n", | |||
" 529 best_val_stds = [\n", | |||
" 530 std_val_scores[value][best_params_index[1][idx]]\n", | |||
" 531 for idx, value in enumerate(best_params_index[0])\n", | |||
" 532 ]\n", | |||
" 533 min_val_std = np.amin(best_val_stds)\n", | |||
" 534 best_params_index = np.where(std_val_scores == min_val_std)\n", | |||
" 535 best_params_out = [\n", | |||
" 536 param_list_pre_revised[i] for i in best_params_index[0]\n", | |||
" 537 ]\n", | |||
" 538 best_params_in = [param_list[i] for i in best_params_index[1]]\n", | |||
" 539 print('best_params_out: ', best_params_out)\n", | |||
" 540 print('best_params_in: ', best_params_in)\n", | |||
" 541 print()\n", | |||
" 542 print('best_val_perf: ', best_val_perf)\n", | |||
" 543 print('best_val_std: ', min_val_std)\n", | |||
" 544 str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n", | |||
" 545 str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n", | |||
" 546 str_fw += 'best_val_perf: %s\\n' % best_val_perf\n", | |||
" 547 str_fw += 'best_val_std: %s\\n' % min_val_std\n", | |||
" 548 \n", | |||
" 549 final_performance = [\n", | |||
" 550 average_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 551 for idx, value in enumerate(best_params_index[0])\n", | |||
" 552 ]\n", | |||
" 553 final_confidence = [\n", | |||
" 554 std_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 555 for idx, value in enumerate(best_params_index[0])\n", | |||
" 556 ]\n", | |||
" 557 print('final_performance: ', final_performance)\n", | |||
" 558 print('final_confidence: ', final_confidence)\n", | |||
" 559 str_fw += 'final_performance: %s\\n' % final_performance\n", | |||
" 560 str_fw += 'final_confidence: %s\\n' % final_confidence\n", | |||
" 561 train_performance = [\n", | |||
" 562 average_train_scores[value][best_params_index[1][idx]]\n", | |||
" 563 for idx, value in enumerate(best_params_index[0])\n", | |||
" 564 ]\n", | |||
" 565 train_std = [\n", | |||
" 566 std_train_scores[value][best_params_index[1][idx]]\n", | |||
" 567 for idx, value in enumerate(best_params_index[0])\n", | |||
" 568 ]\n", | |||
" 569 print('train_performance: %s' % train_performance)\n", | |||
" 570 print('train_std: ', train_std)\n", | |||
" 571 str_fw += 'train_performance: %s\\n' % train_performance\n", | |||
" 572 str_fw += 'train_std: %s\\n\\n' % train_std\n", | |||
" 573 \n", | |||
" 574 print()\n", | |||
" 575 average_gram_matrix_time = np.mean(gram_matrix_time)\n", | |||
" 576 std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n", | |||
" 577 best_gram_matrix_time = [\n", | |||
" 578 gram_matrix_time[i] for i in best_params_index[0]\n", | |||
" 579 ]\n", | |||
" 580 ave_bgmt = np.mean(best_gram_matrix_time)\n", | |||
" 581 std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n", | |||
" 582 print(\n", | |||
" 583 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n", | |||
" 584 .format(average_gram_matrix_time, std_gram_matrix_time))\n", | |||
" 585 print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n", | |||
" 586 ave_bgmt, std_bgmt))\n", | |||
" 587 tt_poster = time.time() - tts # training time with hyper-param choices who did not participate in calculation of gram matrices\n", | |||
" 588 print(\n", | |||
" 589 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format(\n", | |||
" 590 tt_poster))\n", | |||
" 591 print('total training time with all hyper-param choices: {:.2f}s'.format(\n", | |||
" 592 tt_poster + np.sum(gram_matrix_time)))\n", | |||
" 593 # str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n", | |||
" 594 # str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n", | |||
" 595 str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\\n\\n'.format(tt_poster)\n", | |||
" 596 \n", | |||
" 597 # print out as table.\n", | |||
" 598 from collections import OrderedDict\n", | |||
" 599 from tabulate import tabulate\n", | |||
" 600 table_dict = {}\n", | |||
" 601 if model_type == 'regression':\n", | |||
" 602 for param_in in param_list:\n", | |||
" 603 param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n", | |||
" 604 else:\n", | |||
" 605 for param_in in param_list:\n", | |||
" 606 param_in['C'] = '{:.2e}'.format(param_in['C'])\n", | |||
" 607 table_dict['params'] = [{**param_out, **param_in}\n", | |||
" 608 for param_in in param_list for param_out in param_list_pre_revised]\n", | |||
" 609 # table_dict['gram_matrix_time'] = [\n", | |||
" 610 # '{:.2f}'.format(gram_matrix_time[index_out])\n", | |||
" 611 # for param_in in param_list\n", | |||
" 612 # for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 613 # ]\n", | |||
" 614 table_dict['valid_perf'] = [\n", | |||
" 615 '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n", | |||
" 616 std_val_scores[index_out][index_in])\n", | |||
" 617 for index_in, _ in enumerate(param_list)\n", | |||
" 618 for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 619 ]\n", | |||
" 620 table_dict['test_perf'] = [\n", | |||
" 621 '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n", | |||
" 622 std_perf_scores[index_out][index_in])\n", | |||
" 623 for index_in, _ in enumerate(param_list)\n", | |||
" 624 for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 625 ]\n", | |||
" 626 table_dict['train_perf'] = [\n", | |||
" 627 '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n", | |||
" 628 std_train_scores[index_out][index_in])\n", | |||
" 629 for index_in, _ in enumerate(param_list)\n", | |||
" 630 for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 631 ]\n", | |||
" 632 keyorder = [\n", | |||
" 633 'params', 'train_perf', 'valid_perf', 'test_perf'\n", | |||
" 634 ]\n", | |||
" 635 print()\n", | |||
" 636 tb_print = tabulate(\n", | |||
" 637 OrderedDict(\n", | |||
" 638 sorted(table_dict.items(),\n", | |||
" 639 key=lambda i: keyorder.index(i[0]))),\n", | |||
" 640 headers='keys')\n", | |||
" 641 # print(tb_print)\n", | |||
" 642 str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n", | |||
" 643 \n", | |||
" 644 # open file to save all results for this dataset.\n", | |||
" 645 if not os.path.exists(results_dir):\n", | |||
" 646 os.makedirs(results_dir)\n", | |||
" 647 \n", | |||
" 648 # open file to save all results for this dataset.\n", | |||
" 649 145.1 MiB 0.0 MiB if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):\n", | |||
" 650 with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:\n", | |||
" 651 f.write(str_fw)\n", | |||
" 652 else:\n", | |||
" 653 145.1 MiB 0.0 MiB with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f:\n", | |||
" 654 145.1 MiB 0.0 MiB content = f.read()\n", | |||
" 655 145.1 MiB 0.0 MiB f.seek(0, 0)\n", | |||
" 656 145.1 MiB 0.0 MiB f.write(str_fw + '\\n\\n\\n' + content)\n", | |||
"\n", | |||
"\n", | |||
"\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"import functools\n", | |||
"from libs import *\n", | |||
"import multiprocessing\n", | |||
"\n", | |||
"from pygraph.kernels.spKernel import spkernel\n", | |||
"from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n", | |||
"#from pygraph.utils.model_selection_precomputed import trial_do\n", | |||
"\n", | |||
"dslist = [\n", | |||
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n", | |||
" 'task': 'regression'}, # node symb\n", | |||
"# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n", | |||
"# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n", | |||
"# # contains single node graph, node symb\n", | |||
"# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb\n", | |||
"# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n", | |||
"# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
"# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
"# # node nsymb\n", | |||
"# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
"# # node symb/nsymb\n", | |||
"# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||
"# # node/edge symb\n", | |||
"# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n", | |||
"\n", | |||
" # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n", | |||
" # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n", | |||
" #\n", | |||
" # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n", | |||
" # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n", | |||
" # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n", | |||
"\n", | |||
" # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n", | |||
" # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", | |||
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
" # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", | |||
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
" # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
" # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n", | |||
"\n", | |||
" # # not working below\n", | |||
" # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n", | |||
" # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n", | |||
" # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n", | |||
" # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n", | |||
"]\n", | |||
"estimator = spkernel\n", | |||
"mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n", | |||
"param_grid_precomputed = {'node_kernels': [\n", | |||
" {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n", | |||
"param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n", | |||
" {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n", | |||
"\n", | |||
"for ds in dslist:\n", | |||
" print()\n", | |||
" print(ds['name'])\n", | |||
" model_selection_for_precomputed_kernel(\n", | |||
" ds['dataset'],\n", | |||
" estimator,\n", | |||
" param_grid_precomputed,\n", | |||
" (param_grid[1] if ('task' in ds and ds['task']\n", | |||
" == 'regression') else param_grid[0]),\n", | |||
" (ds['task'] if 'task' in ds else 'classification'),\n", | |||
" NUM_TRIALS=30,\n", | |||
" datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n", | |||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | |||
" ds_name=ds['name'],\n", | |||
" n_jobs=multiprocessing.cpu_count(),\n", | |||
" read_gm_from_file=False)\n", | |||
" print()" | |||
] | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 3 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.6.7" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 2 | |||
} |
@@ -0,0 +1,177 @@ | |||
###################### log time: 2019-03-26 10:59:51. ###################### | |||
# This file contains results of spkernel on dataset Acyclic, | |||
# including gram matrices, serial numbers for gram matrix figures and performance. | |||
This is a regression problem. | |||
II. Gram matrices. | |||
the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8} is: | |||
[[1. ,0.47140452,0.33333333,...,0.30151134,0.30512858,0.27852425], | |||
[0.47140452,1. ,0. ,...,0.14213381,0.11986583,0.17232809], | |||
[0.33333333,0. ,1. ,...,0.36851387,0.37293493,0.34815531], | |||
..., | |||
[0.30151134,0.14213381,0.36851387,...,1. ,0.96429344,0.95175317], | |||
[0.30512858,0.11986583,0.37293493,...,0.96429344,1. ,0.96671243], | |||
[0.27852425,0.17232809,0.34815531,...,0.95175317,0.96671243,1. ]] | |||
1 gram matrices are calculated, 0 of which are ignored. | |||
serial numbers of gram matrix figures and their corresponding parameters settings: | |||
0: {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8} | |||
III. Performance. | |||
best settings of hyper-params to build gram matrix: [{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8}] | |||
best settings of other hyper-params: [{'alpha': 1e-06}] | |||
best_val_perf: 9.55244065682399 | |||
best_val_std: 0.5574811966683159 | |||
final_performance: [9.724426192585643] | |||
final_confidence: [2.999822095078807] | |||
train_performance: [6.141755071354953] | |||
train_std: [0.2732168016478284] | |||
time to calculate gram matrix with different hyper-params: 16.95±nans | |||
time to calculate best gram matrix: 16.95±nans | |||
total training time with all hyper-param choices: 32.74s | |||
table of performance v.s. hyper-params: | |||
params train_perf valid_perf test_perf gram_matrix_time | |||
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ------------ ------------ ----------- ------------------ | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-10'} 6.14±0.28 9.70±0.61 9.74±3.00 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-10'} 6.13±0.27 9.75±0.74 9.74±3.03 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-09'} 6.14±0.28 9.68±0.45 9.74±3.04 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-09'} 6.14±0.28 9.75±0.55 9.76±2.99 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-08'} 6.14±0.28 9.60±0.65 9.71±2.99 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-08'} 6.14±0.27 9.74±0.64 9.74±3.00 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-07'} 6.14±0.28 9.60±0.66 9.73±2.98 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-07'} 6.14±0.28 9.77±0.65 9.77±3.07 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-06'} 6.14±0.27 9.55±0.56 9.72±3.00 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-06'} 6.13±0.27 9.79±0.61 9.73±3.04 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-05'} 6.14±0.27 9.68±0.57 9.75±3.01 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-05'} 6.14±0.27 9.75±0.57 9.70±3.02 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-04'} 6.14±0.27 9.56±0.56 9.69±2.98 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-04'} 6.15±0.27 9.62±0.65 9.70±2.97 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-03'} 6.19±0.27 9.65±0.74 9.69±2.98 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-03'} 6.36±0.27 9.73±0.46 9.71±2.92 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-02'} 6.80±0.25 9.90±0.52 9.93±2.98 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-02'} 7.63±0.25 10.33±0.57 10.29±3.01 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e-01'} 9.25±0.25 11.41±0.56 11.29±2.90 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e-01'} 12.42±0.25 14.03±0.34 14.06±2.65 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+00'} 17.48±0.24 18.67±0.35 19.06±2.33 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+00'} 24.52±0.21 25.24±0.31 26.11±2.41 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+01'} 34.07±0.20 34.29±0.31 35.50±4.09 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+01'} 48.90±0.28 48.62±0.40 49.78±7.09 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+02'} 75.87±0.52 75.45±0.68 76.11±9.09 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+02'} 107.85±0.80 107.50±0.87 107.80±9.36 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+03'} 128.21±0.96 127.84±1.04 128.07±9.24 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+03'} 136.81±1.03 136.43±1.13 136.63±9.17 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+04'} 139.82±1.05 139.40±1.13 139.63±9.14 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+04'} 140.80±1.05 140.41±1.07 140.61±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+05'} 141.12±1.06 140.71±1.04 140.92±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+05'} 141.22±1.06 140.84±1.12 141.02±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+06'} 141.25±1.06 140.79±1.12 141.06±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+06'} 141.26±1.06 140.87±1.06 141.07±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+07'} 141.26±1.06 140.85±1.07 141.07±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+07'} 141.26±1.06 140.79±1.05 141.07±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+08'} 141.26±1.06 140.79±1.17 141.07±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+08'} 141.26±1.06 140.86±1.08 141.07±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+09'} 141.26±1.06 140.93±1.06 141.07±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '3.16e+09'} 141.26±1.06 140.85±1.13 141.07±9.13 16.95 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8, 'alpha': '1.00e+10'} 141.26±1.06 140.80±1.07 141.07±9.13 16.95 | |||
###################### log time: 2019-03-26 10:58:24. ###################### | |||
# This file contains results of spkernel on dataset Acyclic, | |||
# including gram matrices, serial numbers for gram matrix figures and performance. | |||
This is a regression problem. | |||
II. Gram matrices. | |||
the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8} is: | |||
[[1. ,0.47140452,0.33333333,...,0.30151134,0.30512858,0.27852425], | |||
[0.47140452,1. ,0. ,...,0.14213381,0.11986583,0.17232809], | |||
[0.33333333,0. ,1. ,...,0.36851387,0.37293493,0.34815531], | |||
..., | |||
[0.30151134,0.14213381,0.36851387,...,1. ,0.96429344,0.95175317], | |||
[0.30512858,0.11986583,0.37293493,...,0.96429344,1. ,0.96671243], | |||
[0.27852425,0.17232809,0.34815531,...,0.95175317,0.96671243,1. ]] | |||
1 gram matrices are calculated, 0 of which are ignored. | |||
serial numbers of gram matrix figures and their corresponding parameters settings: | |||
0: {'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8} | |||
III. Performance. | |||
best settings of hyper-params to build gram matrix: [{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8}] | |||
best settings of other hyper-params: [{'alpha': 1e-07}] | |||
best_val_perf: 9.574376867060177 | |||
best_val_std: 0.7335499737848491 | |||
final_performance: [9.50365754990661] | |||
final_confidence: [2.8602395698342087] | |||
train_performance: [6.17134653357633] | |||
train_std: [0.25758350163124855] | |||
time to calculate gram matrix with different hyper-params: 1.29±nans | |||
time to calculate best gram matrix: 1.29±nans | |||
total training time with all hyper-param choices: 5.19s | |||
table of performance v.s. hyper-params: | |||
params train_perf valid_perf test_perf gram_matrix_time | |||
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ------------ ------------ ----------- ------------------ | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-10'} 6.16±0.26 9.75±0.65 9.54±2.84 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-10'} 6.16±0.26 9.75±0.66 9.53±2.90 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-09'} 6.17±0.27 9.78±0.61 9.50±2.82 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-09'} 6.16±0.26 9.79±0.56 9.53±2.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-08'} 6.17±0.26 9.70±0.58 9.52±2.84 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-08'} 6.16±0.25 9.81±0.68 9.52±2.82 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-07'} 6.17±0.26 9.57±0.73 9.50±2.86 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-07'} 6.16±0.26 9.95±0.70 9.51±2.86 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-06'} 6.17±0.26 9.81±0.58 9.54±2.88 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-06'} 6.16±0.26 9.74±0.70 9.53±2.94 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-05'} 6.17±0.26 9.71±0.61 9.54±2.92 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-05'} 6.17±0.26 9.69±0.61 9.51±2.88 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-04'} 6.17±0.26 9.72±0.70 9.50±2.79 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-04'} 6.18±0.26 9.62±0.73 9.42±2.85 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-03'} 6.21±0.26 9.91±0.52 9.40±2.78 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-03'} 6.39±0.25 9.86±0.64 9.42±2.79 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-02'} 6.83±0.25 9.94±0.56 9.59±2.80 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-02'} 7.66±0.24 10.30±0.45 9.99±2.69 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e-01'} 9.28±0.24 11.38±0.36 11.02±2.55 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e-01'} 12.45±0.22 14.06±0.38 13.79±2.36 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+00'} 17.53±0.21 18.74±0.31 18.88±2.23 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+00'} 24.57±0.19 25.32±0.28 26.29±2.72 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+01'} 34.07±0.22 34.30±0.34 36.29±4.52 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+01'} 48.85±0.34 48.65±0.41 51.21±7.45 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+02'} 75.76±0.57 75.36±0.60 77.93±9.56 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+02'} 107.68±0.86 107.24±0.95 109.70±9.96 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+03'} 128.01±1.04 127.59±1.03 129.96±9.91 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+03'} 136.59±1.11 136.19±1.20 138.51±9.86 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+04'} 139.60±1.14 139.22±1.11 141.51±9.84 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+04'} 140.58±1.15 140.22±1.21 142.49±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+05'} 140.89±1.15 140.48±1.14 142.80±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+05'} 140.99±1.15 140.54±1.17 142.90±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+06'} 141.02±1.15 140.61±1.20 142.93±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+06'} 141.04±1.15 140.65±1.23 142.94±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+07'} 141.04±1.15 140.66±1.20 142.94±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+07'} 141.04±1.15 140.64±1.24 142.94±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+08'} 141.04±1.15 140.65±1.14 142.95±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+08'} 141.04±1.15 140.61±1.22 142.95±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+09'} 141.04±1.15 140.58±1.15 142.95±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '3.16e+09'} 141.04±1.15 140.71±1.17 142.95±9.83 1.29 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7f5619e77598>, 'nsymb': <function gaussiankernel at 0x7f5619e77620>, 'mix': functools.partial(<function kernelproduct at 0x7f5619e77730>, <function deltakernel at 0x7f5619e77598>, <function gaussiankernel at 0x7f5619e77620>)}, 'n_jobs': 8, 'alpha': '1.00e+10'} 141.04±1.15 140.68±1.11 142.95±9.83 1.29 | |||
@@ -0,0 +1,67 @@ | |||
###################### log time: 2019-03-26 11:56:19. ###################### | |||
# This file contains results of spkernel on dataset ds-unknown, | |||
# including gram matrices, serial numbers for gram matrix figures and performance. | |||
This is a regression problem. | |||
II. Gram matrices. | |||
the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1} is: | |||
[[1. ,0.47140452,0.33333333,...,0.30151134,0.30512858,0.27852425], | |||
[0.47140452,1. ,0. ,...,0.14213381,0.11986583,0.17232809], | |||
[0.33333333,0. ,1. ,...,0.36851387,0.37293493,0.34815531], | |||
..., | |||
[0.30151134,0.14213381,0.36851387,...,1. ,0.96429344,0.95175317], | |||
[0.30512858,0.11986583,0.37293493,...,0.96429344,1. ,0.96671243], | |||
[0.27852425,0.17232809,0.34815531,...,0.95175317,0.96671243,1. ]] | |||
1 gram matrices are calculated, 0 of which are ignored. | |||
serial numbers of gram matrix figures and their corresponding parameters settings: | |||
0: {'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1} | |||
III. Performance. | |||
best settings of hyper-params to build gram matrix: [{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1}] | |||
best settings of other hyper-params: [{'alpha': 0.0001}] | |||
best_val_perf: 9.922073568477266 | |||
best_val_std: 0.3829108688812842 | |||
final_performance: [8.039190309451554] | |||
final_confidence: [2.8576078550320037] | |||
train_performance: [6.285008316076738] | |||
train_std: [0.23613211181729038] | |||
time to calculate gram matrix with different hyper-params: 3.52±nans | |||
time to calculate best gram matrix: 3.52±nans | |||
total training time with all hyper-param choices: 4.34s | |||
table of performance v.s. hyper-params: | |||
params train_perf valid_perf test_perf gram_matrix_time | |||
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ------------ ------------ ------------ ------------------ | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-05'} 6.26±0.24 10.65±0.66 8.29±3.21 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-05'} 6.28±0.25 10.69±0.03 8.15±3.02 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-04'} 6.29±0.24 9.92±0.38 8.04±2.86 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-04'} 6.29±0.28 10.29±0.77 7.97±2.94 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-03'} 6.34±0.25 10.16±0.93 8.02±3.04 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-03'} 6.53±0.24 10.08±0.24 7.82±3.10 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-02'} 6.95±0.25 10.54±0.05 8.02±3.58 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-02'} 7.77±0.33 10.76±0.14 8.60±4.14 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e-01'} 9.34±0.35 11.60±0.14 10.01±4.61 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e-01'} 12.51±0.31 14.52±0.68 13.44±4.70 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+00'} 17.59±0.32 18.61±0.28 19.80±5.18 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+00'} 24.46±0.39 25.24±0.56 28.52±6.10 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+01'} 33.85±0.38 34.04±0.04 39.01±8.31 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+01'} 48.65±0.49 48.14±0.20 54.40±12.56 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+02'} 75.53±0.93 75.24±1.32 81.83±16.62 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+02'} 107.29±1.56 106.50±0.85 114.11±18.46 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+03'} 127.49±2.04 127.24±2.09 134.61±19.05 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+03'} 136.01±2.24 135.60±2.06 143.25±19.23 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+04'} 138.99±2.32 138.66±2.41 146.27±19.28 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '3.16e+04'} 139.97±2.35 139.63±2.70 147.26±19.30 3.52 | |||
{'node_kernels': {'symb': <function deltakernel at 0x7ff63c02c158>, 'nsymb': <function gaussiankernel at 0x7ff642e968c8>, 'mix': functools.partial(<function kernelproduct at 0x7ff60b9d21e0>, <function deltakernel at 0x7ff63c02c158>, <function gaussiankernel at 0x7ff642e968c8>)}, 'n_jobs': 1, 'alpha': '1.00e+05'} 140.28±2.35 139.84±2.38 147.58±19.30 3.52 | |||
@@ -1,170 +0,0 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": { | |||
"autoscroll": false, | |||
"ein.tags": "worksheet-0", | |||
"slideshow": { | |||
"slide_type": "-" | |||
} | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"import numpy as np\n", | |||
"import paths\n", | |||
"\n", | |||
"import pygraph\n", | |||
"\n", | |||
"from pygraph.utils.graphfiles import loadDataset\n" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 2, | |||
"metadata": { | |||
"autoscroll": false, | |||
"ein.tags": "worksheet-0", | |||
"slideshow": { | |||
"slide_type": "-" | |||
} | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"import networkx as nx\n", | |||
"import numpy as np\n", | |||
"import matplotlib.pyplot as plt\n", | |||
"\n", | |||
"# We load a ds dataset\n", | |||
"# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n", | |||
"dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 3, | |||
"metadata": { | |||
"autoscroll": false, | |||
"ein.tags": "worksheet-0", | |||
"slideshow": { | |||
"slide_type": "-" | |||
} | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stderr", | |||
"output_type": "stream", | |||
"text": [ | |||
"100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n", | |||
"100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n", | |||
"100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n", | |||
"100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"#Compute graph edit distances\n", | |||
"\n", | |||
"from tqdm import tqdm\n", | |||
"from pygraph.c_ext.lsape_binders import lsap_solverHG\n", | |||
"from pygraph.ged.costfunctions import ConstantCostFunction\n", | |||
"from pygraph.ged.GED import ged\n", | |||
"import time\n", | |||
"\n", | |||
"cf = ConstantCostFunction(1,3,1,3)\n", | |||
"N=len(dataset)\n", | |||
"\n", | |||
"methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n", | |||
"ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n", | |||
"\n", | |||
"times = list()\n", | |||
"start = time.clock()\n", | |||
"for i in tqdm(range(0,N)):\n", | |||
" for j in range(0,N):\n", | |||
" ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n", | |||
"times.append(time.clock() - start)\n", | |||
"\n", | |||
"\n", | |||
"start = time.clock()\n", | |||
"for i in tqdm(range(0,N)):\n", | |||
" for j in range(0,N):\n", | |||
" ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n", | |||
"\n", | |||
"times.append(time.clock() - start)\n", | |||
"\n", | |||
"start = time.clock()\n", | |||
"for i in tqdm(range(0,N)):\n", | |||
" for j in range(0,N):\n", | |||
" ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n", | |||
"times.append(time.clock() - start)\n", | |||
"\n", | |||
"start = time.clock()\n", | |||
"for i in tqdm(range(0,N)):\n", | |||
" for j in range(0,N):\n", | |||
" ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n", | |||
"times.append(time.clock() - start)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 5, | |||
"metadata": { | |||
"autoscroll": false, | |||
"ein.tags": "worksheet-0", | |||
"slideshow": { | |||
"slide_type": "-" | |||
} | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
" method \t mean \t mean \t time\n", | |||
" Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n", | |||
" Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n", | |||
" Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n", | |||
" Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"print(\" method \\t mean \\t mean \\t time\")\n", | |||
"data = list()\n", | |||
"for i in range(0,len(ged_distances)):\n", | |||
" ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n", | |||
" print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 3 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.5.2" | |||
}, | |||
"name": "py-graph_test.ipynb" | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 2 | |||
} |
@@ -12,6 +12,109 @@ | |||
"output_type": "stream", | |||
"text": [ | |||
"\n", | |||
"Acyclic\n", | |||
"\n", | |||
"--- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting sp graphs: 183it [00:00, 5345.48it/s]\n", | |||
"calculating kernels: 16836it [00:01, 16066.90it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 183 built in 1.2855160236358643 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 30it [00:03, 8.63it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'alpha': 0.0001}]\n", | |||
"\n", | |||
"best_val_perf: 9.674788994813262\n", | |||
"best_val_std: 0.6229031522274688\n", | |||
"final_performance: [9.590999824754439]\n", | |||
"final_confidence: [2.911796096257332]\n", | |||
"train_performance: [6.16594412531739]\n", | |||
"train_std: [0.2739093211154806]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 1.29±nans\n", | |||
"time to calculate best gram matrix: 1.29±nans\n", | |||
"total training time with all hyper-param choices: 5.15s\n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"Alkane\n", | |||
"\n", | |||
"--- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"\n", | |||
" 1 graphs are removed as they don't contain edges.\n", | |||
"\n" | |||
] | |||
}, | |||
{ | |||
"name": "stderr", | |||
"output_type": "stream", | |||
"text": [ | |||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", | |||
" keepdims=keepdims)\n", | |||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||
" ret = ret.dtype.type(ret / rcount)\n" | |||
] | |||
}, | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"getting sp graphs: 149it [00:00, 6510.18it/s]\n", | |||
"calculating kernels: 11175it [00:00, 18881.68it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 149 built in 0.8007419109344482 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 30it [00:02, 10.52it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'alpha': 3.162277660168379e-07}]\n", | |||
"\n", | |||
"best_val_perf: 8.784264102873752\n", | |||
"best_val_std: 0.2656887278835053\n", | |||
"final_performance: [8.059911355753659]\n", | |||
"final_confidence: [1.9620843656589473]\n", | |||
"train_performance: [7.8406202266920575]\n", | |||
"train_std: [0.2177862360087283]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 0.80±nans\n", | |||
"time to calculate best gram matrix: 0.80±nans\n", | |||
"total training time with all hyper-param choices: 4.02s\n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"MAO\n", | |||
"\n", | |||
"--- This is a classification problem ---\n", | |||
@@ -23,17 +126,61 @@ | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting sp graphs: 68it [00:00, 692.11it/s]\n", | |||
"calculating kernels: 2346it [00:05, 399.28it/s]\n", | |||
"getting sp graphs: 68it [00:00, 1095.77it/s]\n", | |||
"calculating kernels: 2346it [00:02, 813.63it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 68 built in 3.110588550567627 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 68 built in 6.345669507980347 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7fe240afd620>, 'nsymb': <function gaussiankernel at 0x7fe240afd9d8>, 'mix': functools.partial(<function kernelproduct at 0x7fe240aaf0d0>, <function deltakernel at 0x7fe240afd620>, <function gaussiankernel at 0x7fe240afd9d8>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 7it [00:09, 4.67s/it]" | |||
"cross validation: 30it [00:02, 10.97it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'C': 3162.2776601683795}]\n", | |||
"\n", | |||
"best_val_perf: 0.8798412698412699\n", | |||
"best_val_std: 0.02062186442241262\n", | |||
"final_performance: [0.9042857142857144]\n", | |||
"final_confidence: [0.07343487734322982]\n", | |||
"train_performance: [0.9709180695847363]\n", | |||
"train_std: [0.005927396388634032]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 3.11±nans\n", | |||
"time to calculate best gram matrix: 3.11±nans\n", | |||
"total training time with all hyper-param choices: 6.21s\n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"PAH\n", | |||
"\n", | |||
"--- This is a classification problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting sp graphs: 94it [00:00, 2190.46it/s]\n", | |||
"calculating kernels: 4465it [00:05, 763.81it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 94 built in 6.083932399749756 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f4d3eb29620>, 'nsymb': <function gaussiankernel at 0x7f4d3eb296a8>, 'mix': functools.partial(<function kernelproduct at 0x7f4d3eb297b8>, <function deltakernel at 0x7f4d3eb29620>, <function gaussiankernel at 0x7f4d3eb296a8>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 0it [00:00, ?it/s]" | |||
] | |||
} | |||
], | |||
@@ -46,45 +193,46 @@ | |||
"from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n", | |||
"#from pygraph.utils.model_selection_precomputed import trial_do\n", | |||
"\n", | |||
"# datasets\n", | |||
"dslist = [\n", | |||
"# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n", | |||
"# 'task': 'regression'}, # node symb\n", | |||
"# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n", | |||
"# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n", | |||
"# # contains single node graph, node symb\n", | |||
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n", | |||
" 'task': 'regression'}, # node symb\n", | |||
" {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n", | |||
" 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n", | |||
" # contains single node graph, node symb\n", | |||
" {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb\n", | |||
"# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n", | |||
"# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
"# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
"# # node nsymb\n", | |||
"# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
"# # node symb/nsymb\n", | |||
" {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n", | |||
" {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
" {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
" # node nsymb\n", | |||
" {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
" # node symb/nsymb\n", | |||
"\n", | |||
"# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||
" # node/edge symb\n", | |||
"# # node/edge symb\n", | |||
"# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n", | |||
"\n", | |||
" # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n", | |||
" # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n", | |||
" #\n", | |||
" # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n", | |||
" # # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n", | |||
" # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n", | |||
"\n", | |||
" # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n", | |||
" # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", | |||
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
" # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", | |||
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
" # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
" # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n", | |||
"#\n", | |||
"# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n", | |||
"# {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n", | |||
"# {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n", | |||
"# {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n", | |||
"# {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n", | |||
"#\n", | |||
"# {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n", | |||
"# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
"# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
"# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
"# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n", | |||
"\n", | |||
" # # not working below\n", | |||
" # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n", | |||
@@ -93,12 +241,14 @@ | |||
" # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n", | |||
"]\n", | |||
"estimator = spkernel\n", | |||
"# hyper-parameters\n", | |||
"mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n", | |||
"param_grid_precomputed = {'node_kernels': [\n", | |||
" {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n", | |||
"param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n", | |||
" {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n", | |||
"\n", | |||
"# for each dataset, do model selection.\n", | |||
"for ds in dslist:\n", | |||
" print()\n", | |||
" print(ds['name'])\n", | |||
@@ -115,621 +265,7 @@ | |||
" ds_name=ds['name'],\n", | |||
" n_jobs=multiprocessing.cpu_count(),\n", | |||
" read_gm_from_file=False)\n", | |||
" print()\n" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stderr", | |||
"output_type": "stream", | |||
"text": [ | |||
"[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.\n", | |||
"[Parallel(n_jobs=8)]: Done 2 out of 9 | elapsed: 15.7s remaining: 54.8s\n", | |||
"[Parallel(n_jobs=8)]: Done 3 out of 9 | elapsed: 15.7s remaining: 31.3s\n", | |||
"[Parallel(n_jobs=8)]: Done 4 out of 9 | elapsed: 15.7s remaining: 19.6s\n", | |||
"[Parallel(n_jobs=8)]: Done 5 out of 9 | elapsed: 15.7s remaining: 12.5s\n", | |||
"[Parallel(n_jobs=8)]: Done 6 out of 9 | elapsed: 15.7s remaining: 7.8s\n", | |||
"[Parallel(n_jobs=8)]: Done 7 out of 9 | elapsed: 15.7s remaining: 4.5s\n", | |||
"[Parallel(n_jobs=8)]: Done 9 out of 9 | elapsed: 15.7s remaining: 0.0s\n" | |||
] | |||
}, | |||
{ | |||
"ename": "KeyboardInterrupt", | |||
"evalue": "", | |||
"output_type": "error", | |||
"traceback": [ | |||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |||
"\u001b[0;32m<ipython-input-1-ba0f5fe728f1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 83\u001b[0;31m \u001b[0mParallel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_cores\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdelayed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcompute_ds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mds\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdslist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |||
"\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 960\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 961\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 962\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 963\u001b[0m \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 863\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 864\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 865\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 866\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m 513\u001b[0m AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m 514\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 515\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 516\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 517\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;32m/usr/local/lib/python3.5/dist-packages/joblib/externals/loky/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 426\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 428\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;32m/usr/lib/python3.5/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 294\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# # test parallel computing\n", | |||
"# import psutil\n", | |||
"# # logical=True counts threads, but we are interested in cores\n", | |||
"# psutil.()# .cpu_count(logical=False)\n", | |||
"%load_ext line_profiler\n", | |||
"%matplotlib inline\n", | |||
"import functools\n", | |||
"from libs import *\n", | |||
"from sklearn.metrics.pairwise import rbf_kernel\n", | |||
"from joblib import Parallel, delayed\n", | |||
"import multiprocessing\n", | |||
"\n", | |||
"from pygraph.kernels.spKernel import spkernel\n", | |||
"from pygraph.utils.kernels import deltakernel, kernelsum\n", | |||
"\n", | |||
"num_cores = multiprocessing.cpu_count()\n", | |||
"\n", | |||
"dslist = [ \n", | |||
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', 'task': 'regression'}, # node symb\n", | |||
"# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n", | |||
" {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',}, # unlabeled\n", | |||
" {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',}, # node/edge symb\n", | |||
" {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
" {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', \n", | |||
" 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',}, # contains single node graph, node symb\n", | |||
"# {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n", | |||
" {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, # node/edge symb\n", | |||
" {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},\n", | |||
" {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
"# {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n", | |||
"# {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n", | |||
"# {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n", | |||
"\n", | |||
"# {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n", | |||
"# {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n", | |||
" {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n", | |||
"# {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n", | |||
"# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
"# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
"# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
"# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n", | |||
" \n", | |||
"# # not working below\n", | |||
"# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n", | |||
"# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n", | |||
"# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n", | |||
"# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n", | |||
"]\n", | |||
"estimator = spkernel\n", | |||
"mixkernel = functools.partial(kernelsum, deltakernel, rbf_kernel)\n", | |||
"param_grid_precomputed = {'node_kernels': [{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]}\n", | |||
"param_grid = [{'C': np.logspace(-10, 10, num = 41, base = 10)}, \n", | |||
" {'alpha': np.logspace(-10, 10, num = 41, base = 10)}]\n", | |||
" \n", | |||
"def compute_ds(ds):\n", | |||
" print()\n", | |||
" print(ds['name'])\n", | |||
" model_selection_for_precomputed_kernel(\n", | |||
" ds['dataset'], estimator, param_grid_precomputed, \n", | |||
" (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \n", | |||
" (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30,\n", | |||
" datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n", | |||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | |||
" ds_name=ds['name'])\n", | |||
" \n", | |||
"# %lprun -f spkernel \\\n", | |||
"# model_selection_for_precomputed_kernel( \\\n", | |||
"# ds['dataset'], estimator, param_grid_precomputed, \\\n", | |||
"# (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \\\n", | |||
"# (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30, \\\n", | |||
"# datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), \\\n", | |||
"# extra_params=(ds['extra_params'] if 'extra_params' in ds else None))\n", | |||
" print()\n", | |||
" \n", | |||
"Parallel(n_jobs=num_cores, verbose=10)(delayed(compute_ds)(ds) for ds in dslist)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"\n", | |||
"--- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
"I. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n" | |||
] | |||
}, | |||
{ | |||
"ename": "TypeError", | |||
"evalue": "'NoneType' object is not subscriptable", | |||
"output_type": "error", | |||
"traceback": [ | |||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |||
"\u001b[0;31mRemoteTraceback\u001b[0m Traceback (most recent call last)", | |||
"\u001b[0;31mRemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n File \"/usr/lib/python3.5/multiprocessing/pool.py\", line 119, in worker\n result = (True, func(*args, **kwds))\n File \"/usr/lib/python3.5/multiprocessing/pool.py\", line 44, in mapstar\n return list(map(*args))\n File \"../pygraph/kernels/spKernel.py\", line 359, in spkernel_do\n kn = node_kernels['symb']\nTypeError: 'NoneType' object is not subscriptable\n\"\"\"", | |||
"\nThe above exception was the direct cause of the following exception:\n", | |||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |||
"\u001b[0;32m<ipython-input-1-b5a6e5aa5a44>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, \n\u001b[0;32m---> 15\u001b[0;31m 'regression', NUM_TRIALS=30)\n\u001b[0m", | |||
"\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/model_selection_precomputed.py\u001b[0m in \u001b[0;36mmodel_selection_for_precomputed_kernel\u001b[0;34m(datafile, estimator, param_grid_precomputed, param_grid, model_type, NUM_TRIALS, datafile_y, extra_params, ds_name, n_jobs)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams_out\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparam_list_precomputed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0mparams_out\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'n_jobs'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 122\u001b[0;31m \u001b[0mrtn_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mparams_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 123\u001b[0m \u001b[0mKmatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrtn_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[0mcurrent_run_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrtn_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(node_label, edge_weight, node_kernels, n_jobs, *args)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0mdo_partial\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpartial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspkernel_do\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mGn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mds_attrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode_label\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode_kernels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mitr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcombinations_with_replacement\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpool\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdo_partial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtotal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0mKmatrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;32m/usr/lib/python3.5/multiprocessing/pool.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, func, iterable, chunksize)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ma\u001b[0m \u001b[0mlist\u001b[0m \u001b[0mthat\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mreturned\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m '''\n\u001b[0;32m--> 260\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_map_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miterable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmapstar\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 261\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstarmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miterable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;32m/usr/lib/python3.5/multiprocessing/pool.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 606\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 607\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 608\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 609\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 610\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |||
"\u001b[0;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable" | |||
] | |||
}, | |||
{ | |||
"name": "stderr", | |||
"output_type": "stream", | |||
"text": [ | |||
"Process ForkPoolWorker-1:\n", | |||
"Traceback (most recent call last):\n", | |||
" File \"/usr/lib/python3.5/multiprocessing/process.py\", line 249, in _bootstrap\n", | |||
" self.run()\n", | |||
" File \"/usr/lib/python3.5/multiprocessing/process.py\", line 93, in run\n", | |||
" self._target(*self._args, **self._kwargs)\n", | |||
" File \"/usr/lib/python3.5/multiprocessing/pool.py\", line 108, in worker\n", | |||
" task = get()\n", | |||
" File \"/usr/lib/python3.5/multiprocessing/queues.py\", line 343, in get\n", | |||
" res = self._reader.recv_bytes()\n", | |||
" File \"/usr/lib/python3.5/multiprocessing/connection.py\", line 216, in recv_bytes\n", | |||
" buf = self._recv_bytes(maxlength)\n", | |||
" File \"/usr/lib/python3.5/multiprocessing/connection.py\", line 407, in _recv_bytes\n", | |||
" buf = self._recv(4)\n", | |||
" File \"/usr/lib/python3.5/multiprocessing/connection.py\", line 379, in _recv\n", | |||
" chunk = read(handle, remaining)\n", | |||
"KeyboardInterrupt\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"%load_ext line_profiler\n", | |||
"%matplotlib inline\n", | |||
"import numpy as np\n", | |||
"import sys\n", | |||
"sys.path.insert(0, \"../\")\n", | |||
"from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n", | |||
"from pygraph.kernels.spKernel import spkernel\n", | |||
"\n", | |||
"datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", | |||
"estimator = spkernel\n", | |||
"param_grid_precomputed = {}\n", | |||
"param_grid = {'alpha': np.logspace(-1, 1, num = 41, base = 10)}\n", | |||
"\n", | |||
"model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, \n", | |||
" 'regression', NUM_TRIALS=30)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"\n", | |||
" --- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
" Loading dataset from file...\n", | |||
"\n", | |||
" Calculating kernel matrix, this could take a while...\n", | |||
"--- shortest path kernel matrix of size 185 built in 13.3865065574646 seconds ---\n", | |||
"[[ 3. 1. 3. ... 1. 1. 1.]\n", | |||
" [ 1. 6. 1. ... 0. 0. 3.]\n", | |||
" [ 3. 1. 3. ... 1. 1. 1.]\n", | |||
" ...\n", | |||
" [ 1. 0. 1. ... 55. 21. 7.]\n", | |||
" [ 1. 0. 1. ... 21. 55. 7.]\n", | |||
" [ 1. 3. 1. ... 7. 7. 55.]]\n", | |||
"\n", | |||
" Starting calculate accuracy/rmse...\n", | |||
"calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 757.54it/s]\n", | |||
" Mean performance on train set: 28.360361\n", | |||
"With standard deviation: 1.357183\n", | |||
"\n", | |||
" Mean performance on test set: 35.191954\n", | |||
"With standard deviation: 4.495767\n", | |||
"calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 771.22it/s]\n", | |||
"\n", | |||
"\n", | |||
" rmse_test std_test rmse_train std_train k_time\n", | |||
"----------- ---------- ------------ ----------- --------\n", | |||
" 35.192 4.49577 28.3604 1.35718 13.3865\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"%load_ext line_profiler\n", | |||
"\n", | |||
"import sys\n", | |||
"sys.path.insert(0, \"../\")\n", | |||
"from pygraph.utils.utils import kernel_train_test\n", | |||
"from pygraph.kernels.spKernel import spkernel\n", | |||
"\n", | |||
"datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", | |||
"kernel_file_path = 'kernelmatrices_path_acyclic/'\n", | |||
"\n", | |||
"kernel_para = dict(edge_weight = 'atom')\n", | |||
"\n", | |||
"kernel_train_test(datafile, kernel_file_path, spkernel, kernel_para, normalize = False)\n", | |||
"\n", | |||
"# %lprun -f spkernel \\\n", | |||
"# kernel_train_test(datafile, kernel_file_path, spkernel, kernel_para, normalize = False)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# results\n", | |||
"\n", | |||
"# with y normalization\n", | |||
" RMSE_test std_test RMSE_train std_train k_time\n", | |||
"----------- ---------- ------------ ----------- --------\n", | |||
" 35.6337 5.23183 32.3805 3.92531 14.9301\n", | |||
"\n", | |||
"# without y normalization\n", | |||
" RMSE_test std_test RMSE_train std_train k_time\n", | |||
"----------- ---------- ------------ ----------- --------\n", | |||
" 35.192 4.49577 28.3604 1.35718 14.5768" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 5, | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"\n", | |||
"- This script take as input a kernel matrix\n", | |||
"and returns the classification or regression performance\n", | |||
"- The kernel matrix can be calculated using any of the graph kernels approaches\n", | |||
"- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", | |||
"- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", | |||
"then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", | |||
"provide the corresponding performance on the test set. If more than one split is performed, the final results \n", | |||
"correspond to the average of the performances on the test sets. \n", | |||
"\n", | |||
"@references\n", | |||
" https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", | |||
"\n", | |||
"\n", | |||
" Loading dataset from file...\n", | |||
"[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n", | |||
" 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n", | |||
" 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n", | |||
" 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n", | |||
" 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n", | |||
" 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n", | |||
" 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n", | |||
" 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n", | |||
" 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n", | |||
" 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n", | |||
" 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n", | |||
" 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n", | |||
" 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n", | |||
" 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n", | |||
" 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n", | |||
" 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n", | |||
" 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n", | |||
" 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n", | |||
"\n", | |||
" Loading the matrix from file...\n", | |||
"[[ 3. 1. 3. ..., 1. 1. 1.]\n", | |||
" [ 1. 6. 1. ..., 0. 0. 3.]\n", | |||
" [ 3. 1. 3. ..., 1. 1. 1.]\n", | |||
" ..., \n", | |||
" [ 1. 0. 1. ..., 55. 21. 7.]\n", | |||
" [ 1. 0. 1. ..., 21. 55. 7.]\n", | |||
" [ 1. 3. 1. ..., 7. 7. 55.]]\n", | |||
"\n", | |||
" --- This is a regression problem ---\n", | |||
"\n", | |||
" Starting split 10...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 12 with parameter alpha = 100.000000\n", | |||
"The best performance on the validation set is: 40.422382\n", | |||
"The corresponding performance on test set is: 47.424532\n", | |||
"\n", | |||
" Starting split 11...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 12 with parameter alpha = 100.000000\n", | |||
"The best performance on the validation set is: 33.084913\n", | |||
"The corresponding performance on test set is: 35.493699\n", | |||
"\n", | |||
" Starting split 12...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 12 with parameter alpha = 100.000000\n", | |||
"The best performance on the validation set is: 31.306710\n", | |||
"The corresponding performance on test set is: 33.173366\n", | |||
"\n", | |||
" Starting split 13...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 12 with parameter alpha = 100.000000\n", | |||
"The best performance on the validation set is: 43.500424\n", | |||
"The corresponding performance on test set is: 32.633129\n", | |||
"\n", | |||
" Starting split 14...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 10 with parameter alpha = 1.000000\n", | |||
"The best performance on the validation set is: 53.561752\n", | |||
"The corresponding performance on test set is: 42.883548\n", | |||
"\n", | |||
" Starting split 15...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 12 with parameter alpha = 100.000000\n", | |||
"The best performance on the validation set is: 40.444773\n", | |||
"The corresponding performance on test set is: 32.713040\n", | |||
"\n", | |||
" Starting split 16...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 11 with parameter alpha = 10.000000\n", | |||
"The best performance on the validation set is: 37.046818\n", | |||
"The corresponding performance on test set is: 37.337851\n", | |||
"\n", | |||
" Starting split 17...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 12 with parameter alpha = 100.000000\n", | |||
"The best performance on the validation set is: 39.907628\n", | |||
"The corresponding performance on test set is: 38.889064\n", | |||
"\n", | |||
" Starting split 18...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 12 with parameter alpha = 100.000000\n", | |||
"The best performance on the validation set is: 29.879950\n", | |||
"The corresponding performance on test set is: 27.652558\n", | |||
"\n", | |||
" Starting split 19...\n", | |||
"\n", | |||
" Normalizing output y...\n", | |||
"The best performance is for trial 11 with parameter alpha = 10.000000\n", | |||
"The best performance on the validation set is: 44.911892\n", | |||
"The corresponding performance on test set is: 35.804454\n", | |||
"\n", | |||
" Mean performance on val set: 39.406724\n", | |||
"With standard deviation: 6.720820\n", | |||
"\n", | |||
" Mean performance on test set: 36.400524\n", | |||
"With standard deviation: 5.352940\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# Author: Elisabetta Ghisu\n", | |||
"\n", | |||
"\"\"\"\n", | |||
"- This script take as input a kernel matrix\n", | |||
"and returns the classification or regression performance\n", | |||
"- The kernel matrix can be calculated using any of the graph kernels approaches\n", | |||
"- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n", | |||
"- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n", | |||
"then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n", | |||
"provide the corresponding performance on the test set. If more than one split is performed, the final results \n", | |||
"correspond to the average of the performances on the test sets. \n", | |||
"\n", | |||
"@references\n", | |||
" https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n", | |||
"\"\"\"\n", | |||
"\n", | |||
"print(__doc__)\n", | |||
"\n", | |||
"import sys\n", | |||
"import pathlib\n", | |||
"sys.path.insert(0, \"../\")\n", | |||
"from tabulate import tabulate\n", | |||
"\n", | |||
"import random\n", | |||
"import numpy as np\n", | |||
"import matplotlib.pyplot as plt\n", | |||
"\n", | |||
"from sklearn.kernel_ridge import KernelRidge # 0.17\n", | |||
"from sklearn.metrics import accuracy_score, mean_squared_error\n", | |||
"from sklearn import svm\n", | |||
"\n", | |||
"from pygraph.kernels.spkernel import spkernel\n", | |||
"from pygraph.utils.graphfiles import loadDataset\n", | |||
"\n", | |||
"print('\\n Loading dataset from file...')\n", | |||
"dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n", | |||
"y = np.array(y)\n", | |||
"print(y)\n", | |||
"\n", | |||
"kernel_file_path = 'kernelmatrix.ds'\n", | |||
"path = pathlib.Path(kernel_file_path)\n", | |||
"if path.is_file():\n", | |||
" print('\\n Loading the matrix from file...')\n", | |||
" Kmatrix = np.loadtxt(kernel_file_path)\n", | |||
" print(Kmatrix)\n", | |||
"else:\n", | |||
" print('\\n Calculating kernel matrix, this could take a while...')\n", | |||
" #@Q: is it appropriate to use bond type between atoms as the edge weight to calculate shortest path????????\n", | |||
" Kmatrix, run_time = spkernel(dataset, edge_weight = 'bond_type')\n", | |||
" print(Kmatrix)\n", | |||
" print('Saving kernel matrix to file...')\n", | |||
" np.savetxt(kernel_file_path, Kmatrix)\n", | |||
"\n", | |||
"# setup the parameters\n", | |||
"model_type = 'regression' # Regression or classification problem\n", | |||
"print('\\n --- This is a %s problem ---' % model_type)\n", | |||
"\n", | |||
"datasize = len(dataset)\n", | |||
"trials = 21 # Trials for hyperparameters random search\n", | |||
"splits = 10 # Number of splits of the data\n", | |||
"alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n", | |||
"C_grid = np.logspace(-10, 10, num = trials, base = 10)\n", | |||
"random.seed(20) # Set the seed for uniform parameter distribution\n", | |||
"\n", | |||
"\n", | |||
"\"\"\"\n", | |||
"- Here starts the main program\n", | |||
"- First we permute the data, then for each split we evaluate corresponding performances\n", | |||
"- In the end, the performances are averaged over the test sets\n", | |||
"\"\"\"\n", | |||
"\n", | |||
"# Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n", | |||
"val_split = []\n", | |||
"test_split = []\n", | |||
"\n", | |||
"# For each split of the data\n", | |||
"for j in range(10, 10 + splits):\n", | |||
" print('\\n Starting split %d...' % j)\n", | |||
"\n", | |||
" # Set the random set for data permutation\n", | |||
" random_state = int(j)\n", | |||
" np.random.seed(random_state)\n", | |||
" idx_perm = np.random.permutation(datasize)\n", | |||
"# print(idx_perm)\n", | |||
" \n", | |||
" # Permute the data\n", | |||
" y_perm = y[idx_perm] # targets permutation\n", | |||
"# print(y_perm)\n", | |||
" Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n", | |||
"# print(Kmatrix_perm)\n", | |||
" Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n", | |||
" \n", | |||
" # Set the training, validation and test\n", | |||
" # Note: the percentage can be set up by the user\n", | |||
" num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n", | |||
" num_test = datasize - num_train_val # 10% (of entire dataset) for test\n", | |||
" num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n", | |||
" num_val = num_train_val - num_train # 10% (of train + val) for validation\n", | |||
" \n", | |||
" # Split the kernel matrix\n", | |||
" Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n", | |||
" Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n", | |||
" Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n", | |||
"\n", | |||
" # Split the targets\n", | |||
" y_train = y_perm[0:num_train]\n", | |||
"\n", | |||
" # Normalization step (for real valued targets only)\n", | |||
" print('\\n Normalizing output y...')\n", | |||
" if model_type == 'regression':\n", | |||
" y_train_mean = np.mean(y_train)\n", | |||
" y_train_std = np.std(y_train)\n", | |||
" y_train = (y_train - y_train_mean) / float(y_train_std)\n", | |||
"# print(y)\n", | |||
" \n", | |||
" y_val = y_perm[num_train:(num_train + num_val)]\n", | |||
" y_test = y_perm[(num_train + num_val):datasize]\n", | |||
" \n", | |||
" # Record the performance for each parameter trial respectively on validation and test set\n", | |||
" perf_all_val = []\n", | |||
" perf_all_test = []\n", | |||
" \n", | |||
" # For each parameter trial\n", | |||
" for i in range(trials):\n", | |||
" # For regression use the Kernel Ridge method\n", | |||
" if model_type == 'regression':\n", | |||
"# print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n", | |||
"\n", | |||
" # Fit the kernel ridge model\n", | |||
" KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n", | |||
"# KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n", | |||
" KR.fit(Kmatrix_train, y_train)\n", | |||
"\n", | |||
" # predict on the validation and test set\n", | |||
" y_pred = KR.predict(Kmatrix_val)\n", | |||
" y_pred_test = KR.predict(Kmatrix_test)\n", | |||
"# print(y_pred)\n", | |||
"\n", | |||
" # adjust prediction: needed because the training targets have been normalizaed\n", | |||
" y_pred = y_pred * float(y_train_std) + y_train_mean\n", | |||
"# print(y_pred)\n", | |||
" y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n", | |||
"# print(y_pred_test)\n", | |||
"\n", | |||
" # root mean squared error on validation\n", | |||
" rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", | |||
" perf_all_val.append(rmse)\n", | |||
"\n", | |||
" # root mean squared error in test \n", | |||
" rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", | |||
" perf_all_test.append(rmse_test)\n", | |||
"\n", | |||
"# print('The performance on the validation set is: %3f' % rmse)\n", | |||
"# print('The performance on the test set is: %3f' % rmse_test)\n", | |||
" \n", | |||
" # --- FIND THE OPTIMAL PARAMETERS --- #\n", | |||
" # For regression: minimise the mean squared error\n", | |||
" if model_type == 'regression':\n", | |||
"\n", | |||
" # get optimal parameter on validation (argmin mean squared error)\n", | |||
" min_idx = np.argmin(perf_all_test)\n", | |||
" alpha_opt = alpha_grid[min_idx]\n", | |||
"\n", | |||
" # performance corresponding to optimal parameter on val\n", | |||
" perf_val_opt = perf_all_val[min_idx]\n", | |||
"\n", | |||
" # corresponding performance on test for the same parameter\n", | |||
" perf_test_opt = perf_all_test[min_idx]\n", | |||
"\n", | |||
" print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n", | |||
" print('The best performance on the validation set is: %3f' % perf_val_opt)\n", | |||
" print('The corresponding performance on test set is: %3f' % perf_test_opt)\n", | |||
"\n", | |||
" # append the best performance on validation\n", | |||
" # at the current split\n", | |||
" val_split.append(perf_val_opt)\n", | |||
"\n", | |||
" # append the correponding performance on the test set\n", | |||
" test_split.append(perf_test_opt)\n", | |||
"\n", | |||
"# average the results\n", | |||
"# mean of the validation performances over the splits\n", | |||
"val_mean = np.mean(np.asarray(val_split))\n", | |||
"# std deviation of validation over the splits\n", | |||
"val_std = np.std(np.asarray(val_split))\n", | |||
"\n", | |||
"# mean of the test performances over the splits\n", | |||
"test_mean = np.mean(np.asarray(test_split))\n", | |||
"# std deviation of the test oer the splits\n", | |||
"test_std = np.std(np.asarray(test_split))\n", | |||
"\n", | |||
"print('\\n Mean performance on val set: %3f' % val_mean)\n", | |||
"print('With standard deviation: %3f' % val_std)\n", | |||
"print('\\n Mean performance on test set: %3f' % test_mean)\n", | |||
"print('With standard deviation: %3f' % test_std)" | |||
" print()" | |||
] | |||
} | |||
], | |||
@@ -749,7 +285,7 @@ | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.6.6" | |||
"version": "3.6.7" | |||
} | |||
}, | |||
"nbformat": 4, | |||
@@ -2,7 +2,7 @@ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"execution_count": null, | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
@@ -12,9 +12,9 @@ | |||
"output_type": "stream", | |||
"text": [ | |||
"\n", | |||
"MAO\n", | |||
"Acyclic\n", | |||
"\n", | |||
"--- This is a classification problem ---\n", | |||
"--- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
@@ -23,162 +23,47 @@ | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting shortest paths: 68it [00:00, 629.46it/s]\n", | |||
"calculating kernels: 2346it [00:22, 102.31it/s]\n", | |||
"getting shortest paths: 183it [00:00, 5316.42it/s]\n", | |||
"calculating kernels: 16836it [00:03, 4625.84it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 183 built in 3.8611345291137695 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 68 built in 23.390946626663208 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'edge_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 0%| | 0/30 [00:00<?, ?it/s]0 0\n", | |||
"params_in: {'C': 1e-10}\n", | |||
"0 1\n", | |||
"params_in: {'C': 3.1622776601683795e-10}\n", | |||
"0 2\n", | |||
"params_in: {'C': 1e-09}\n", | |||
"0 3\n", | |||
"params_in: {'C': 3.1622776601683795e-09}\n", | |||
"0 4\n", | |||
"params_in: {'C': 1e-08}\n", | |||
"0 5\n", | |||
"params_in: {'C': 3.162277660168379e-08}\n", | |||
"0 6\n", | |||
"params_in: {'C': 1e-07}\n", | |||
"0 7\n", | |||
"params_in: {'C': 3.162277660168379e-07}\n", | |||
"0 8\n", | |||
"params_in: {'C': 1e-06}\n", | |||
"0 9\n", | |||
"params_in: {'C': 3.162277660168379e-06}\n", | |||
"0 10\n", | |||
"params_in: {'C': 1e-05}\n", | |||
"0 11\n", | |||
"params_in: {'C': 3.1622776601683795e-05}\n", | |||
"0 12\n", | |||
"params_in: {'C': 0.0001}\n", | |||
"0 13\n", | |||
"params_in: {'C': 0.00031622776601683794}\n", | |||
"0 14\n", | |||
"params_in: {'C': 0.001}\n", | |||
"0 15\n", | |||
"params_in: {'C': 0.0031622776601683794}\n", | |||
"0 16\n", | |||
"params_in: {'C': 0.01}\n", | |||
"0 17\n", | |||
"params_in: {'C': 0.03162277660168379}\n", | |||
"0 18\n", | |||
"params_in: {'C': 0.1}\n", | |||
"0 19\n", | |||
"params_in: {'C': 0.31622776601683794}\n", | |||
"0 20\n", | |||
"params_in: {'C': 1.0}\n", | |||
"0 21\n", | |||
"params_in: {'C': 3.1622776601683795}\n", | |||
"0 22\n", | |||
"params_in: {'C': 10.0}\n", | |||
"0 23\n", | |||
"params_in: {'C': 31.622776601683793}\n", | |||
"0 24\n", | |||
"params_in: {'C': 100.0}\n", | |||
"0 25\n", | |||
"params_in: {'C': 316.22776601683796}\n", | |||
"0 26\n", | |||
"params_in: {'C': 1000.0}\n", | |||
"0 27\n", | |||
"params_in: {'C': 3162.2776601683795}\n", | |||
"0 28\n", | |||
"params_in: {'C': 10000.0}\n", | |||
"0 29\n", | |||
"params_in: {'C': 31622.776601683792}\n", | |||
"0 30\n", | |||
"params_in: {'C': 100000.0}\n", | |||
"0 31\n", | |||
"params_in: {'C': 316227.7660168379}\n", | |||
"0 32\n", | |||
"params_in: {'C': 1000000.0}\n", | |||
"0 33\n", | |||
"params_in: {'C': 3162277.6601683795}\n", | |||
"0 34\n", | |||
"params_in: {'C': 10000000.0}\n", | |||
"0 35\n", | |||
"params_in: {'C': 31622776.60168379}\n", | |||
"0 36\n", | |||
"params_in: {'C': 100000000.0}\n", | |||
"0 37\n", | |||
"params_in: {'C': 316227766.01683795}\n", | |||
"0 38\n", | |||
"params_in: {'C': 1000000000.0}\n", | |||
"0 39\n", | |||
"params_in: {'C': 3162277660.1683793}\n", | |||
"0 40\n", | |||
"params_in: {'C': 10000000000.0}\n", | |||
"val_pref: [[0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n", | |||
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n", | |||
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n", | |||
" 0.59285714 0.59285714 0.55952381 0.71666667 0.81666667 0.81666667\n", | |||
" 0.83571429 0.86666667 0.9 0.9 0.9 0.9\n", | |||
" 0.9 0.9 0.9 0.9 0.9 0.9\n", | |||
" 0.9 0.9 0.9 0.9 0.9 ]]\n", | |||
"test_pref: [[0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n", | |||
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n", | |||
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n", | |||
" 0.28571429 0.28571429 0.61428571 0.84285714 0.84285714 0.85714286\n", | |||
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n", | |||
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n", | |||
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286]]\n", | |||
"cross validation: 100%|██████████| 30/30 [00:11<00:00, 2.75it/s]\n", | |||
"\n", | |||
"cross validation: 30it [00:03, 8.71it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"val_pref: [0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n", | |||
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n", | |||
" 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714 0.59285714\n", | |||
" 0.59285714 0.59285714 0.55952381 0.71666667 0.81666667 0.81666667\n", | |||
" 0.83571429 0.86666667 0.9 0.9 0.9 0.9\n", | |||
" 0.9 0.9 0.9 0.9 0.9 0.9\n", | |||
" 0.9 0.9 0.9 0.9 0.9 ]\n", | |||
"test_pref: [0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n", | |||
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n", | |||
" 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429 0.28571429\n", | |||
" 0.28571429 0.28571429 0.61428571 0.84285714 0.84285714 0.85714286\n", | |||
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n", | |||
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286\n", | |||
" 0.85714286 0.85714286 0.85714286 0.85714286 0.85714286]\n", | |||
"average_val_scores: [[0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n", | |||
" 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n", | |||
" 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587 0.55301587\n", | |||
" 0.55301587 0.55468254 0.61507937 0.71777778 0.78039683 0.80531746\n", | |||
" 0.86198413 0.89531746 0.89420635 0.87190476 0.85761905 0.85761905\n", | |||
" 0.85761905 0.85761905 0.85761905 0.85761905 0.85761905 0.85761905\n", | |||
" 0.85761905 0.85761905 0.85761905 0.85761905 0.85761905]]\n", | |||
"best_val_perf: 0.8953174603174604\n", | |||
"\n", | |||
"best_params_out: [{'edge_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f90ea71dae8>, 'nsymb': <function gaussiankernel at 0x7f90ea71d620>, 'mix': functools.partial(<function kernelproduct at 0x7f90ea71d6a8>, <function deltakernel at 0x7f90ea71dae8>, <function gaussiankernel at 0x7f90ea71d620>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'C': 316.22776601683796}]\n", | |||
"\n", | |||
"best_val_perf: 0.8953174603174604\n", | |||
"best_val_std: 0.029090007386146643\n", | |||
"(array([0]), array([25]))\n", | |||
"[0]\n", | |||
"[[0.5047619 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619\n", | |||
" 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619\n", | |||
" 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619 0.5047619\n", | |||
" 0.5047619 0.49761905 0.66 0.75857143 0.78857143 0.82857143\n", | |||
" 0.85285714 0.86380952 0.84428571 0.82190476 0.81571429 0.81571429\n", | |||
" 0.81571429 0.81571429 0.81571429 0.81571429 0.81571429 0.81571429\n", | |||
" 0.81571429 0.81571429 0.81571429 0.81571429 0.81571429]]\n", | |||
"final_performance: [0.8638095238095236]\n", | |||
"final_confidence: [0.10509426306201483]\n", | |||
"train_performance: [0.9857934904601572]\n", | |||
"train_std: [0.00730576290039335]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 23.39±nans\n", | |||
"time to calculate best gram matrix: 23.39±nans\n", | |||
"total training time with all hyper-param choices: 34.88s\n", | |||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'alpha': 0.0031622776601683794}]\n", | |||
"\n", | |||
"best_val_perf: 12.673707811197355\n", | |||
"best_val_std: 0.8773195213759171\n", | |||
"final_performance: [12.972668262063593]\n", | |||
"final_confidence: [3.7642237202379087]\n", | |||
"train_performance: [3.934708519599526]\n", | |||
"train_std: [0.16225809646161615]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 3.86±nans\n", | |||
"time to calculate best gram matrix: 3.86±nans\n", | |||
"total training time with all hyper-param choices: 7.74s\n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"Alkane\n", | |||
"\n", | |||
"--- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n" | |||
] | |||
}, | |||
@@ -191,17 +76,112 @@ | |||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||
" ret = ret.dtype.type(ret / rcount)\n" | |||
] | |||
}, | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"getting shortest paths: 150it [00:00, 8822.07it/s]\n", | |||
"calculating kernels: 11325it [00:02, 5167.04it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 150 built in 2.394453525543213 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 30it [00:02, 10.78it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'alpha': 0.1}]\n", | |||
"\n", | |||
"best_val_perf: 11.082918177885857\n", | |||
"best_val_std: 0.3037589925734673\n", | |||
"final_performance: [7.8261546009779925]\n", | |||
"final_confidence: [1.59375970943081]\n", | |||
"train_performance: [7.988630946761633]\n", | |||
"train_std: [0.16054607648943253]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 2.39±nans\n", | |||
"time to calculate best gram matrix: 2.39±nans\n", | |||
"total training time with all hyper-param choices: 5.49s\n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"MAO\n", | |||
"\n", | |||
"--- This is a classification problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting shortest paths: 68it [00:00, 567.53it/s]\n", | |||
"calculating kernels: 2346it [00:14, 161.71it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 68 built in 14.833482265472412 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 30it [00:02, 13.38it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"best_params_out: [{'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'C': 1000.0}]\n", | |||
"\n", | |||
"best_val_perf: 0.9084126984126983\n", | |||
"best_val_std: 0.027912022159840448\n", | |||
"final_performance: [0.9085714285714286]\n", | |||
"final_confidence: [0.0879511091875412]\n", | |||
"train_performance: [0.9679438832772166]\n", | |||
"train_std: [0.00754192133247499]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 14.83±nans\n", | |||
"time to calculate best gram matrix: 14.83±nans\n", | |||
"total training time with all hyper-param choices: 17.42s\n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"PAH\n", | |||
"\n", | |||
"--- This is a classification problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting shortest paths: 94it [00:00, 447.28it/s]\n", | |||
"calculating kernels: 4465it [01:04, 68.94it/s] \n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 94 built in 65.20552921295166 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'compute_method': 'naive', 'edge_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'node_kernels': {'symb': <function deltakernel at 0x7f470f0ad268>, 'nsymb': <function gaussiankernel at 0x7f470f0ad2f0>, 'mix': functools.partial(<function kernelproduct at 0x7f470f0ad400>, <function deltakernel at 0x7f470f0ad268>, <function gaussiankernel at 0x7f470f0ad2f0>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 0it [00:00, ?it/s]" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"#!/usr/bin/env python3\n", | |||
"# -*- coding: utf-8 -*-\n", | |||
"\"\"\"\n", | |||
"Created on Fri Sep 28 16:37:29 2018\n", | |||
"\n", | |||
"@author: ljia\n", | |||
"\"\"\"\n", | |||
"\n", | |||
"import functools\n", | |||
"from libs import *\n", | |||
"import multiprocessing\n", | |||
@@ -210,19 +190,19 @@ | |||
"from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n", | |||
"\n", | |||
"dslist = [\n", | |||
"# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n", | |||
"# 'task': 'regression'}, # node symb\n", | |||
"# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n", | |||
"# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n", | |||
"# # contains single node graph, node symb\n", | |||
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',\n", | |||
" 'task': 'regression'}, # node symb\n", | |||
" {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',\n", | |||
" 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, \n", | |||
" # contains single node graph, node symb\n", | |||
" {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb\n", | |||
"# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n", | |||
"# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
"# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
" {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled\n", | |||
" {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
" {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
" # node nsymb\n", | |||
"# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
"# # node symb/nsymb\n", | |||
" {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
" # node symb/nsymb\n", | |||
"# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||
"# # node/edge symb\n", | |||
"# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n", | |||
@@ -260,7 +240,8 @@ | |||
"param_grid_precomputed = {'node_kernels': \n", | |||
" [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],\n", | |||
" 'edge_kernels': \n", | |||
" [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n", | |||
" [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],\n", | |||
" 'compute_method': ['naive']}\n", | |||
"param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n", | |||
" {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n", | |||
"\n", | |||
@@ -300,7 +281,7 @@ | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.6.6" | |||
"version": "3.6.7" | |||
} | |||
}, | |||
"nbformat": 4, | |||
@@ -14,17 +14,17 @@ from pygraph.kernels.structuralspKernel import structuralspkernel | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
dslist = [ | |||
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
# 'task': 'regression'}, # node symb | |||
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
# # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node nsymb | |||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
'task': 'regression'}, # node symb | |||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
# contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
@@ -0,0 +1,821 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"\n", | |||
"Acyclic\n", | |||
"\n", | |||
"--- This is a regression problem ---\n", | |||
"\n", | |||
"\n", | |||
"1. Loading dataset from file...\n", | |||
"\n", | |||
"2. Calculating gram matrices. This could take a while...\n", | |||
"\n", | |||
" None edge weight specified. Set all weight to 1.\n", | |||
"\n", | |||
"getting sp graphs: 183it [00:00, 1871.37it/s]\n", | |||
"calculating kernels: 16836it [00:16, 1014.42it/s]\n", | |||
"\n", | |||
" --- shortest path kernel matrix of size 183 built in 16.947543382644653 seconds ---\n", | |||
"\n", | |||
"the gram matrix with parameters {'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8} is: \n", | |||
"\n", | |||
"\n", | |||
"\n", | |||
"1 gram matrices are calculated, 0 of which are ignored.\n", | |||
"\n", | |||
"3. Fitting and predicting using nested cross validation. This could really take a while...\n", | |||
"cross validation: 30it [00:12, 2.03it/s]\n", | |||
"\n", | |||
"4. Getting final performance...\n", | |||
"best_params_out: [{'node_kernels': {'symb': <function deltakernel at 0x7f3a99093950>, 'nsymb': <function gaussiankernel at 0x7f3a990931e0>, 'mix': functools.partial(<function kernelproduct at 0x7f3a99088ae8>, <function deltakernel at 0x7f3a99093950>, <function gaussiankernel at 0x7f3a990931e0>)}, 'n_jobs': 8}]\n", | |||
"best_params_in: [{'alpha': 1e-06}]\n", | |||
"\n", | |||
"best_val_perf: 9.55244065682399\n", | |||
"best_val_std: 0.5574811966683159\n", | |||
"final_performance: [9.724426192585643]\n", | |||
"final_confidence: [2.999822095078807]\n", | |||
"train_performance: [6.141755071354953]\n", | |||
"train_std: [0.2732168016478284]\n", | |||
"\n", | |||
"time to calculate gram matrix with different hyper-params: 16.95±nans\n", | |||
"time to calculate best gram matrix: 16.95±nans\n", | |||
"total training time with all hyper-param choices: 32.74s\n", | |||
"\n" | |||
] | |||
}, | |||
{ | |||
"name": "stderr", | |||
"output_type": "stream", | |||
"text": [ | |||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:140: RuntimeWarning: Degrees of freedom <= 0 for slice\n", | |||
" keepdims=keepdims)\n", | |||
"/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:132: RuntimeWarning: invalid value encountered in double_scalars\n", | |||
" ret = ret.dtype.type(ret / rcount)\n" | |||
] | |||
}, | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Filename: ../../pygraph/utils/model_selection_precomputed.py\n", | |||
"\n", | |||
"Line # Mem usage Increment Line Contents\n", | |||
"================================================\n", | |||
" 24 115.2 MiB 115.2 MiB @profile\n", | |||
" 25 def model_selection_for_precomputed_kernel(datafile,\n", | |||
" 26 estimator,\n", | |||
" 27 param_grid_precomputed,\n", | |||
" 28 param_grid,\n", | |||
" 29 model_type,\n", | |||
" 30 NUM_TRIALS=30,\n", | |||
" 31 datafile_y=None,\n", | |||
" 32 extra_params=None,\n", | |||
" 33 ds_name='ds-unknown',\n", | |||
" 34 n_jobs=1,\n", | |||
" 35 read_gm_from_file=False):\n", | |||
" 36 \"\"\"Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results.\n", | |||
" 37 \n", | |||
" 38 Parameters\n", | |||
" 39 ----------\n", | |||
" 40 datafile : string\n", | |||
" 41 Path of dataset file.\n", | |||
" 42 estimator : function\n", | |||
" 43 kernel function used to estimate. This function needs to return a gram matrix.\n", | |||
" 44 param_grid_precomputed : dictionary\n", | |||
" 45 Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n", | |||
" 46 param_grid : dictionary\n", | |||
" 47 Dictionary with names (string) of parameters used as penelties as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.\n", | |||
" 48 model_type : string\n", | |||
" 49 Typr of the problem, can be regression or classification.\n", | |||
" 50 NUM_TRIALS : integer\n", | |||
" 51 Number of random trials of outer cv loop. The default is 30.\n", | |||
" 52 datafile_y : string\n", | |||
" 53 Path of file storing y data. This parameter is optional depending on the given dataset file.\n", | |||
" 54 read_gm_from_file : boolean\n", | |||
" 55 Whether gram matrices are loaded from file.\n", | |||
" 56 \n", | |||
" 57 Examples\n", | |||
" 58 --------\n", | |||
" 59 >>> import numpy as np\n", | |||
" 60 >>> import sys\n", | |||
" 61 >>> sys.path.insert(0, \"../\")\n", | |||
" 62 >>> from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel\n", | |||
" 63 >>> from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", | |||
" 64 >>>\n", | |||
" 65 >>> datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", | |||
" 66 >>> estimator = weisfeilerlehmankernel\n", | |||
" 67 >>> param_grid_precomputed = {'height': [0,1,2,3,4,5,6,7,8,9,10], 'base_kernel': ['subtree']}\n", | |||
" 68 >>> param_grid = {\"alpha\": np.logspace(-2, 2, num = 10, base = 10)}\n", | |||
" 69 >>>\n", | |||
" 70 >>> model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, 'regression')\n", | |||
" 71 \"\"\"\n", | |||
" 72 115.2 MiB 0.0 MiB tqdm.monitor_interval = 0\n", | |||
" 73 \n", | |||
" 74 115.2 MiB 0.0 MiB results_dir = '../notebooks/results/' + estimator.__name__\n", | |||
" 75 115.2 MiB 0.0 MiB if not os.path.exists(results_dir):\n", | |||
" 76 os.makedirs(results_dir)\n", | |||
" 77 # a string to save all the results.\n", | |||
" 78 115.2 MiB 0.0 MiB str_fw = '###################### log time: ' + datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\") + '. ######################\\n\\n'\n", | |||
" 79 115.2 MiB 0.0 MiB str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\\n# including gram matrices, serial numbers for gram matrix figures and performance.\\n\\n'\n", | |||
" 80 \n", | |||
" 81 # setup the model type\n", | |||
" 82 115.2 MiB 0.0 MiB model_type = model_type.lower()\n", | |||
" 83 115.2 MiB 0.0 MiB if model_type != 'regression' and model_type != 'classification':\n", | |||
" 84 raise Exception(\n", | |||
" 85 'The model type is incorrect! Please choose from regression or classification.'\n", | |||
" 86 )\n", | |||
" 87 115.2 MiB 0.0 MiB print()\n", | |||
" 88 115.2 MiB 0.0 MiB print('--- This is a %s problem ---' % model_type)\n", | |||
" 89 115.2 MiB 0.0 MiB str_fw += 'This is a %s problem.\\n' % model_type\n", | |||
" 90 \n", | |||
" 91 # calculate gram matrices rather than read them from file.\n", | |||
" 92 115.2 MiB 0.0 MiB if read_gm_from_file == False:\n", | |||
" 93 # Load the dataset\n", | |||
" 94 115.2 MiB 0.0 MiB print()\n", | |||
" 95 115.2 MiB 0.0 MiB print('\\n1. Loading dataset from file...')\n", | |||
" 96 115.2 MiB 0.0 MiB if isinstance(datafile, str):\n", | |||
" 97 115.2 MiB 0.0 MiB dataset, y_all = loadDataset(\n", | |||
" 98 116.3 MiB 1.1 MiB datafile, filename_y=datafile_y, extra_params=extra_params)\n", | |||
" 99 else: # load data directly from variable.\n", | |||
" 100 dataset = datafile\n", | |||
" 101 y_all = datafile_y \n", | |||
" 102 \n", | |||
" 103 # import matplotlib.pyplot as plt\n", | |||
" 104 # import networkx as nx\n", | |||
" 105 # nx.draw_networkx(dataset[30])\n", | |||
" 106 # plt.show()\n", | |||
" 107 \n", | |||
" 108 # Grid of parameters with a discrete number of values for each.\n", | |||
" 109 116.3 MiB 0.0 MiB param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n", | |||
" 110 116.3 MiB 0.0 MiB param_list = list(ParameterGrid(param_grid))\n", | |||
" 111 \n", | |||
" 112 116.3 MiB 0.0 MiB gram_matrices = [\n", | |||
" 113 ] # a list to store gram matrices for all param_grid_precomputed\n", | |||
" 114 116.3 MiB 0.0 MiB gram_matrix_time = [\n", | |||
" 115 ] # a list to store time to calculate gram matrices\n", | |||
" 116 116.3 MiB 0.0 MiB param_list_pre_revised = [\n", | |||
" 117 ] # list to store param grids precomputed ignoring the useless ones\n", | |||
" 118 \n", | |||
" 119 # calculate all gram matrices\n", | |||
" 120 116.3 MiB 0.0 MiB print()\n", | |||
" 121 116.3 MiB 0.0 MiB print('2. Calculating gram matrices. This could take a while...')\n", | |||
" 122 116.3 MiB 0.0 MiB str_fw += '\\nII. Gram matrices.\\n\\n'\n", | |||
" 123 116.3 MiB 0.0 MiB tts = time.time() # start training time\n", | |||
" 124 116.3 MiB 0.0 MiB nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)\n", | |||
" 125 145.3 MiB 0.0 MiB for idx, params_out in enumerate(param_list_precomputed):\n", | |||
" 126 116.3 MiB 0.0 MiB y = y_all[:]\n", | |||
" 127 116.3 MiB 0.0 MiB params_out['n_jobs'] = n_jobs\n", | |||
" 128 # print(dataset)\n", | |||
" 129 # import networkx as nx\n", | |||
" 130 # nx.draw_networkx(dataset[1])\n", | |||
" 131 # plt.show()\n", | |||
" 132 119.5 MiB 3.1 MiB rtn_data = estimator(dataset[:], **params_out)\n", | |||
" 133 119.5 MiB 0.0 MiB Kmatrix = rtn_data[0]\n", | |||
" 134 119.5 MiB 0.0 MiB current_run_time = rtn_data[1]\n", | |||
" 135 # for some kernels, some graphs in datasets may not meet the \n", | |||
" 136 # kernels' requirements for graph structure. These graphs are trimmed. \n", | |||
" 137 119.5 MiB 0.0 MiB if len(rtn_data) == 3:\n", | |||
" 138 119.5 MiB 0.0 MiB idx_trim = rtn_data[2] # the index of trimmed graph list\n", | |||
" 139 119.5 MiB 0.0 MiB y = [y[idxt] for idxt in idx_trim] # trim y accordingly\n", | |||
" 140 # Kmatrix = np.random.rand(2250, 2250)\n", | |||
" 141 # current_run_time = 0.1\n", | |||
" 142 \n", | |||
" 143 # remove graphs whose kernels with themselves are zeros\n", | |||
" 144 119.5 MiB 0.0 MiB Kmatrix_diag = Kmatrix.diagonal().copy()\n", | |||
" 145 119.5 MiB 0.0 MiB nb_g_ignore = 0\n", | |||
" 146 119.5 MiB 0.0 MiB for idxk, diag in enumerate(Kmatrix_diag):\n", | |||
" 147 119.5 MiB 0.0 MiB if diag == 0:\n", | |||
" 148 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)\n", | |||
" 149 Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)\n", | |||
" 150 nb_g_ignore += 1\n", | |||
" 151 # normalization\n", | |||
" 152 119.5 MiB 0.0 MiB Kmatrix_diag = Kmatrix.diagonal().copy()\n", | |||
" 153 119.5 MiB 0.0 MiB for i in range(len(Kmatrix)):\n", | |||
" 154 119.5 MiB 0.0 MiB for j in range(i, len(Kmatrix)):\n", | |||
" 155 119.5 MiB 0.0 MiB Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])\n", | |||
" 156 119.5 MiB 0.0 MiB Kmatrix[j][i] = Kmatrix[i][j]\n", | |||
" 157 \n", | |||
" 158 119.5 MiB 0.0 MiB print()\n", | |||
" 159 119.5 MiB 0.0 MiB if params_out == {}:\n", | |||
" 160 print('the gram matrix is: ')\n", | |||
" 161 str_fw += 'the gram matrix is:\\n\\n'\n", | |||
" 162 else:\n", | |||
" 163 119.5 MiB 0.0 MiB print('the gram matrix with parameters', params_out, 'is: \\n\\n')\n", | |||
" 164 119.5 MiB 0.0 MiB str_fw += 'the gram matrix with parameters %s is:\\n\\n' % params_out\n", | |||
" 165 119.5 MiB 0.0 MiB if len(Kmatrix) < 2:\n", | |||
" 166 nb_gm_ignore += 1\n", | |||
" 167 print('ignored, as at most only one of all its diagonal value is non-zero.')\n", | |||
" 168 str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\\n\\n'\n", | |||
" 169 else: \n", | |||
" 170 119.5 MiB 0.0 MiB if np.isnan(Kmatrix).any(\n", | |||
" 171 ): # if the matrix contains elements that are not numbers\n", | |||
" 172 nb_gm_ignore += 1\n", | |||
" 173 print('ignored, as it contains elements that are not numbers.')\n", | |||
" 174 str_fw += 'ignored, as it contains elements that are not numbers.\\n\\n'\n", | |||
" 175 else:\n", | |||
" 176 # print(Kmatrix)\n", | |||
" 177 119.5 MiB 0.0 MiB str_fw += np.array2string(\n", | |||
" 178 119.5 MiB 0.0 MiB Kmatrix,\n", | |||
" 179 119.5 MiB 0.0 MiB separator=',') + '\\n\\n'\n", | |||
" 180 # separator=',',\n", | |||
" 181 # threshold=np.inf,\n", | |||
" 182 # floatmode='unique') + '\\n\\n'\n", | |||
" 183 \n", | |||
" 184 119.5 MiB 0.0 MiB fig_file_name = results_dir + '/GM[ds]' + ds_name\n", | |||
" 185 119.5 MiB 0.0 MiB if params_out != {}:\n", | |||
" 186 119.5 MiB 0.0 MiB fig_file_name += '[params]' + str(idx)\n", | |||
" 187 120.3 MiB 0.7 MiB plt.imshow(Kmatrix)\n", | |||
" 188 120.4 MiB 0.1 MiB plt.colorbar()\n", | |||
" 189 145.3 MiB 24.9 MiB plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)\n", | |||
" 190 # plt.show()\n", | |||
" 191 145.3 MiB 0.0 MiB plt.clf()\n", | |||
" 192 145.3 MiB 0.0 MiB gram_matrices.append(Kmatrix)\n", | |||
" 193 145.3 MiB 0.0 MiB gram_matrix_time.append(current_run_time)\n", | |||
" 194 145.3 MiB 0.0 MiB param_list_pre_revised.append(params_out)\n", | |||
" 195 145.3 MiB 0.0 MiB if nb_g_ignore > 0:\n", | |||
" 196 print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)\n", | |||
" 197 str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore\n", | |||
" 198 145.3 MiB 0.0 MiB print()\n", | |||
" 199 145.3 MiB 0.0 MiB print(\n", | |||
" 200 145.3 MiB 0.0 MiB '{} gram matrices are calculated, {} of which are ignored.'.format(\n", | |||
" 201 145.3 MiB 0.0 MiB len(param_list_precomputed), nb_gm_ignore))\n", | |||
" 202 145.3 MiB 0.0 MiB str_fw += '{} gram matrices are calculated, {} of which are ignored.\\n\\n'.format(len(param_list_precomputed), nb_gm_ignore)\n", | |||
" 203 145.3 MiB 0.0 MiB str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\\n\\n'\n", | |||
" 204 145.3 MiB 0.0 MiB str_fw += ''.join([\n", | |||
" 205 145.3 MiB 0.0 MiB '{}: {}\\n'.format(idx, params_out)\n", | |||
" 206 145.3 MiB 0.0 MiB for idx, params_out in enumerate(param_list_precomputed)\n", | |||
" 207 ])\n", | |||
" 208 \n", | |||
" 209 145.3 MiB 0.0 MiB print()\n", | |||
" 210 145.3 MiB 0.0 MiB if len(gram_matrices) == 0:\n", | |||
" 211 print('all gram matrices are ignored, no results obtained.')\n", | |||
" 212 str_fw += '\\nall gram matrices are ignored, no results obtained.\\n\\n'\n", | |||
" 213 else:\n", | |||
" 214 # save gram matrices to file.\n", | |||
" 215 145.4 MiB 0.1 MiB np.savez(results_dir + '/' + ds_name + '.gm', \n", | |||
" 216 145.4 MiB 0.0 MiB gms=gram_matrices, params=param_list_pre_revised, y=y, \n", | |||
" 217 145.4 MiB 0.0 MiB gmtime=gram_matrix_time)\n", | |||
" 218 \n", | |||
" 219 145.4 MiB 0.0 MiB print(\n", | |||
" 220 145.4 MiB 0.0 MiB '3. Fitting and predicting using nested cross validation. This could really take a while...'\n", | |||
" 221 )\n", | |||
" 222 \n", | |||
" 223 # ---- use pool.imap_unordered to parallel and track progress. ----\n", | |||
" 224 # train_pref = []\n", | |||
" 225 # val_pref = []\n", | |||
" 226 # test_pref = []\n", | |||
" 227 # def func_assign(result, var_to_assign):\n", | |||
" 228 # for idx, itm in enumerate(var_to_assign):\n", | |||
" 229 # itm.append(result[idx]) \n", | |||
" 230 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)\n", | |||
" 231 # \n", | |||
" 232 # parallel_me(trial_do_partial, range(NUM_TRIALS), func_assign, \n", | |||
" 233 # [train_pref, val_pref, test_pref], glbv=gram_matrices,\n", | |||
" 234 # method='imap_unordered', n_jobs=n_jobs, chunksize=1,\n", | |||
" 235 # itr_desc='cross validation')\n", | |||
" 236 \n", | |||
" 237 145.4 MiB 0.0 MiB def init_worker(gms_toshare):\n", | |||
" 238 global G_gms\n", | |||
" 239 G_gms = gms_toshare\n", | |||
" 240 \n", | |||
" 241 # gram_matrices = np.array(gram_matrices)\n", | |||
" 242 # gms_shape = gram_matrices.shape\n", | |||
" 243 # gms_array = Array('d', np.reshape(gram_matrices.copy(), -1, order='C'))\n", | |||
" 244 # pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gms_array, gms_shape))\n", | |||
" 245 145.4 MiB 0.0 MiB pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n", | |||
" 246 145.4 MiB 0.0 MiB trial_do_partial = partial(parallel_trial_do, param_list_pre_revised, param_list, y, model_type)\n", | |||
" 247 145.4 MiB 0.0 MiB train_pref = []\n", | |||
" 248 145.4 MiB 0.0 MiB val_pref = []\n", | |||
" 249 145.4 MiB 0.0 MiB test_pref = []\n", | |||
" 250 # if NUM_TRIALS < 1000 * n_jobs:\n", | |||
" 251 # chunksize = int(NUM_TRIALS / n_jobs) + 1\n", | |||
" 252 # else:\n", | |||
" 253 # chunksize = 1000\n", | |||
" 254 145.4 MiB 0.0 MiB chunksize = 1\n", | |||
" 255 145.4 MiB 0.0 MiB for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n", | |||
" 256 145.4 MiB 0.0 MiB train_pref.append(o1)\n", | |||
" 257 145.4 MiB 0.0 MiB val_pref.append(o2)\n", | |||
" 258 145.4 MiB 0.0 MiB test_pref.append(o3)\n", | |||
" 259 145.4 MiB 0.0 MiB pool.close()\n", | |||
" 260 145.4 MiB 0.0 MiB pool.join()\n", | |||
" 261 \n", | |||
" 262 # # ---- use pool.map to parallel. ----\n", | |||
" 263 # pool = Pool(n_jobs)\n", | |||
" 264 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y[0:250], model_type)\n", | |||
" 265 # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n", | |||
" 266 # train_pref = [item[0] for item in result_perf]\n", | |||
" 267 # val_pref = [item[1] for item in result_perf]\n", | |||
" 268 # test_pref = [item[2] for item in result_perf]\n", | |||
" 269 \n", | |||
" 270 # # ---- direct running, normally use a single CPU core. ----\n", | |||
" 271 # train_pref = []\n", | |||
" 272 # val_pref = []\n", | |||
" 273 # test_pref = []\n", | |||
" 274 # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n", | |||
" 275 # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n", | |||
" 276 # train_pref.append(o1)\n", | |||
" 277 # val_pref.append(o2)\n", | |||
" 278 # test_pref.append(o3)\n", | |||
" 279 # print()\n", | |||
" 280 \n", | |||
" 281 145.4 MiB 0.0 MiB print()\n", | |||
" 282 145.4 MiB 0.0 MiB print('4. Getting final performance...')\n", | |||
" 283 145.4 MiB 0.0 MiB str_fw += '\\nIII. Performance.\\n\\n'\n", | |||
" 284 # averages and confidences of performances on outer trials for each combination of parameters\n", | |||
" 285 145.4 MiB 0.0 MiB average_train_scores = np.mean(train_pref, axis=0)\n", | |||
" 286 # print('val_pref: ', val_pref[0][0])\n", | |||
" 287 145.4 MiB 0.0 MiB average_val_scores = np.mean(val_pref, axis=0)\n", | |||
" 288 # print('test_pref: ', test_pref[0][0])\n", | |||
" 289 145.4 MiB 0.0 MiB average_perf_scores = np.mean(test_pref, axis=0)\n", | |||
" 290 # sample std is used here\n", | |||
" 291 145.4 MiB 0.0 MiB std_train_scores = np.std(train_pref, axis=0, ddof=1)\n", | |||
" 292 145.4 MiB 0.0 MiB std_val_scores = np.std(val_pref, axis=0, ddof=1)\n", | |||
" 293 145.4 MiB 0.0 MiB std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n", | |||
" 294 \n", | |||
" 295 145.4 MiB 0.0 MiB if model_type == 'regression':\n", | |||
" 296 145.4 MiB 0.0 MiB best_val_perf = np.amin(average_val_scores)\n", | |||
" 297 else:\n", | |||
" 298 best_val_perf = np.amax(average_val_scores)\n", | |||
" 299 # print('average_val_scores: ', average_val_scores)\n", | |||
" 300 # print('best_val_perf: ', best_val_perf)\n", | |||
" 301 # print()\n", | |||
" 302 145.4 MiB 0.0 MiB best_params_index = np.where(average_val_scores == best_val_perf)\n", | |||
" 303 # find smallest val std with best val perf.\n", | |||
" 304 best_val_stds = [\n", | |||
" 305 145.4 MiB 0.0 MiB std_val_scores[value][best_params_index[1][idx]]\n", | |||
" 306 145.4 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 307 ]\n", | |||
" 308 145.4 MiB 0.0 MiB min_val_std = np.amin(best_val_stds)\n", | |||
" 309 145.4 MiB 0.0 MiB best_params_index = np.where(std_val_scores == min_val_std)\n", | |||
" 310 best_params_out = [\n", | |||
" 311 145.4 MiB 0.0 MiB param_list_pre_revised[i] for i in best_params_index[0]\n", | |||
" 312 ]\n", | |||
" 313 145.4 MiB 0.0 MiB best_params_in = [param_list[i] for i in best_params_index[1]]\n", | |||
" 314 145.4 MiB 0.0 MiB print('best_params_out: ', best_params_out)\n", | |||
" 315 145.4 MiB 0.0 MiB print('best_params_in: ', best_params_in)\n", | |||
" 316 145.4 MiB 0.0 MiB print()\n", | |||
" 317 145.4 MiB 0.0 MiB print('best_val_perf: ', best_val_perf)\n", | |||
" 318 145.4 MiB 0.0 MiB print('best_val_std: ', min_val_std)\n", | |||
" 319 145.4 MiB 0.0 MiB str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n", | |||
" 320 145.4 MiB 0.0 MiB str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n", | |||
" 321 145.4 MiB 0.0 MiB str_fw += 'best_val_perf: %s\\n' % best_val_perf\n", | |||
" 322 145.4 MiB 0.0 MiB str_fw += 'best_val_std: %s\\n' % min_val_std\n", | |||
" 323 \n", | |||
" 324 # print(best_params_index)\n", | |||
" 325 # print(best_params_index[0])\n", | |||
" 326 # print(average_perf_scores)\n", | |||
" 327 final_performance = [\n", | |||
" 328 145.4 MiB 0.0 MiB average_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 329 145.4 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 330 ]\n", | |||
" 331 final_confidence = [\n", | |||
" 332 145.4 MiB 0.0 MiB std_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 333 145.4 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 334 ]\n", | |||
" 335 145.4 MiB 0.0 MiB print('final_performance: ', final_performance)\n", | |||
" 336 145.4 MiB 0.0 MiB print('final_confidence: ', final_confidence)\n", | |||
" 337 145.4 MiB 0.0 MiB str_fw += 'final_performance: %s\\n' % final_performance\n", | |||
" 338 145.4 MiB 0.0 MiB str_fw += 'final_confidence: %s\\n' % final_confidence\n", | |||
" 339 train_performance = [\n", | |||
" 340 145.4 MiB 0.0 MiB average_train_scores[value][best_params_index[1][idx]]\n", | |||
" 341 145.4 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 342 ]\n", | |||
" 343 train_std = [\n", | |||
" 344 145.4 MiB 0.0 MiB std_train_scores[value][best_params_index[1][idx]]\n", | |||
" 345 145.4 MiB 0.0 MiB for idx, value in enumerate(best_params_index[0])\n", | |||
" 346 ]\n", | |||
" 347 145.4 MiB 0.0 MiB print('train_performance: %s' % train_performance)\n", | |||
" 348 145.4 MiB 0.0 MiB print('train_std: ', train_std)\n", | |||
" 349 145.4 MiB 0.0 MiB str_fw += 'train_performance: %s\\n' % train_performance\n", | |||
" 350 145.4 MiB 0.0 MiB str_fw += 'train_std: %s\\n\\n' % train_std\n", | |||
" 351 \n", | |||
" 352 145.4 MiB 0.0 MiB print()\n", | |||
" 353 145.4 MiB 0.0 MiB tt_total = time.time() - tts # training time for all hyper-parameters\n", | |||
" 354 145.4 MiB 0.0 MiB average_gram_matrix_time = np.mean(gram_matrix_time)\n", | |||
" 355 145.4 MiB 0.0 MiB std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n", | |||
" 356 best_gram_matrix_time = [\n", | |||
" 357 145.4 MiB 0.0 MiB gram_matrix_time[i] for i in best_params_index[0]\n", | |||
" 358 ]\n", | |||
" 359 145.4 MiB 0.0 MiB ave_bgmt = np.mean(best_gram_matrix_time)\n", | |||
" 360 145.4 MiB 0.0 MiB std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n", | |||
" 361 145.4 MiB 0.0 MiB print(\n", | |||
" 362 145.4 MiB 0.0 MiB 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n", | |||
" 363 145.4 MiB 0.0 MiB .format(average_gram_matrix_time, std_gram_matrix_time))\n", | |||
" 364 145.4 MiB 0.0 MiB print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n", | |||
" 365 145.4 MiB 0.0 MiB ave_bgmt, std_bgmt))\n", | |||
" 366 145.4 MiB 0.0 MiB print(\n", | |||
" 367 145.4 MiB 0.0 MiB 'total training time with all hyper-param choices: {:.2f}s'.format(\n", | |||
" 368 145.4 MiB 0.0 MiB tt_total))\n", | |||
" 369 145.4 MiB 0.0 MiB str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n", | |||
" 370 145.4 MiB 0.0 MiB str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n", | |||
" 371 145.4 MiB 0.0 MiB str_fw += 'total training time with all hyper-param choices: {:.2f}s\\n\\n'.format(tt_total)\n", | |||
" 372 \n", | |||
" 373 # # save results to file\n", | |||
" 374 # np.savetxt(results_name_pre + 'average_train_scores.dt',\n", | |||
" 375 # average_train_scores)\n", | |||
" 376 # np.savetxt(results_name_pre + 'average_val_scores', average_val_scores)\n", | |||
" 377 # np.savetxt(results_name_pre + 'average_perf_scores.dt',\n", | |||
" 378 # average_perf_scores)\n", | |||
" 379 # np.savetxt(results_name_pre + 'std_train_scores.dt', std_train_scores)\n", | |||
" 380 # np.savetxt(results_name_pre + 'std_val_scores.dt', std_val_scores)\n", | |||
" 381 # np.savetxt(results_name_pre + 'std_perf_scores.dt', std_perf_scores)\n", | |||
" 382 \n", | |||
" 383 # np.save(results_name_pre + 'best_params_index', best_params_index)\n", | |||
" 384 # np.save(results_name_pre + 'best_params_pre.dt', best_params_out)\n", | |||
" 385 # np.save(results_name_pre + 'best_params_in.dt', best_params_in)\n", | |||
" 386 # np.save(results_name_pre + 'best_val_perf.dt', best_val_perf)\n", | |||
" 387 # np.save(results_name_pre + 'best_val_std.dt', best_val_std)\n", | |||
" 388 # np.save(results_name_pre + 'final_performance.dt', final_performance)\n", | |||
" 389 # np.save(results_name_pre + 'final_confidence.dt', final_confidence)\n", | |||
" 390 # np.save(results_name_pre + 'train_performance.dt', train_performance)\n", | |||
" 391 # np.save(results_name_pre + 'train_std.dt', train_std)\n", | |||
" 392 \n", | |||
" 393 # np.save(results_name_pre + 'gram_matrix_time.dt', gram_matrix_time)\n", | |||
" 394 # np.save(results_name_pre + 'average_gram_matrix_time.dt',\n", | |||
" 395 # average_gram_matrix_time)\n", | |||
" 396 # np.save(results_name_pre + 'std_gram_matrix_time.dt',\n", | |||
" 397 # std_gram_matrix_time)\n", | |||
" 398 # np.save(results_name_pre + 'best_gram_matrix_time.dt',\n", | |||
" 399 # best_gram_matrix_time)\n", | |||
" 400 \n", | |||
" 401 # print out as table.\n", | |||
" 402 145.4 MiB 0.0 MiB from collections import OrderedDict\n", | |||
" 403 145.4 MiB 0.0 MiB from tabulate import tabulate\n", | |||
" 404 145.4 MiB 0.0 MiB table_dict = {}\n", | |||
" 405 145.4 MiB 0.0 MiB if model_type == 'regression':\n", | |||
" 406 145.6 MiB 0.0 MiB for param_in in param_list:\n", | |||
" 407 145.6 MiB 0.2 MiB param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n", | |||
" 408 else:\n", | |||
" 409 for param_in in param_list:\n", | |||
" 410 param_in['C'] = '{:.2e}'.format(param_in['C'])\n", | |||
" 411 145.6 MiB 0.0 MiB table_dict['params'] = [{**param_out, **param_in}\n", | |||
" 412 145.6 MiB 0.0 MiB for param_in in param_list for param_out in param_list_pre_revised]\n", | |||
" 413 table_dict['gram_matrix_time'] = [\n", | |||
" 414 145.6 MiB 0.0 MiB '{:.2f}'.format(gram_matrix_time[index_out])\n", | |||
" 415 145.6 MiB 0.0 MiB for param_in in param_list\n", | |||
" 416 145.6 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 417 ]\n", | |||
" 418 table_dict['valid_perf'] = [\n", | |||
" 419 145.6 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n", | |||
" 420 std_val_scores[index_out][index_in])\n", | |||
" 421 145.6 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n", | |||
" 422 145.6 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 423 ]\n", | |||
" 424 table_dict['test_perf'] = [\n", | |||
" 425 145.6 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n", | |||
" 426 std_perf_scores[index_out][index_in])\n", | |||
" 427 145.6 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n", | |||
" 428 145.6 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 429 ]\n", | |||
" 430 table_dict['train_perf'] = [\n", | |||
" 431 145.6 MiB 0.0 MiB '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n", | |||
" 432 std_train_scores[index_out][index_in])\n", | |||
" 433 145.6 MiB 0.0 MiB for index_in, _ in enumerate(param_list)\n", | |||
" 434 145.6 MiB 0.0 MiB for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 435 ]\n", | |||
" 436 keyorder = [\n", | |||
" 437 145.6 MiB 0.0 MiB 'params', 'train_perf', 'valid_perf', 'test_perf',\n", | |||
" 438 145.6 MiB 0.0 MiB 'gram_matrix_time'\n", | |||
" 439 ]\n", | |||
" 440 145.6 MiB 0.0 MiB print()\n", | |||
" 441 145.6 MiB 0.0 MiB tb_print = tabulate(\n", | |||
" 442 145.6 MiB 0.0 MiB OrderedDict(\n", | |||
" 443 145.6 MiB 0.0 MiB sorted(table_dict.items(),\n", | |||
" 444 145.6 MiB 0.0 MiB key=lambda i: keyorder.index(i[0]))),\n", | |||
" 445 145.6 MiB 0.0 MiB headers='keys')\n", | |||
" 446 # print(tb_print)\n", | |||
" 447 145.6 MiB 0.0 MiB str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n", | |||
" 448 \n", | |||
" 449 # read gram matrices from file.\n", | |||
" 450 else: \n", | |||
" 451 # Grid of parameters with a discrete number of values for each.\n", | |||
" 452 # param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n", | |||
" 453 param_list = list(ParameterGrid(param_grid))\n", | |||
" 454 \n", | |||
" 455 # read gram matrices from file.\n", | |||
" 456 print()\n", | |||
" 457 print('2. Reading gram matrices from file...')\n", | |||
" 458 str_fw += '\\nII. Gram matrices.\\n\\nGram matrices are read from file, see last log for detail.\\n'\n", | |||
" 459 gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')\n", | |||
" 460 gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed\n", | |||
" 461 gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices\n", | |||
" 462 param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones\n", | |||
" 463 y = gmfile['y'].tolist()\n", | |||
" 464 \n", | |||
" 465 tts = time.time() # start training time\n", | |||
" 466 # nb_gm_ignore = 0 # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN) \n", | |||
" 467 print(\n", | |||
" 468 '3. Fitting and predicting using nested cross validation. This could really take a while...'\n", | |||
" 469 )\n", | |||
" 470 \n", | |||
" 471 # ---- use pool.imap_unordered to parallel and track progress. ----\n", | |||
" 472 def init_worker(gms_toshare):\n", | |||
" 473 global G_gms\n", | |||
" 474 G_gms = gms_toshare\n", | |||
" 475 \n", | |||
" 476 pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))\n", | |||
" 477 trial_do_partial = partial(parallel_trial_do, param_list_pre_revised, param_list, y, model_type)\n", | |||
" 478 train_pref = []\n", | |||
" 479 val_pref = []\n", | |||
" 480 test_pref = []\n", | |||
" 481 chunksize = 1\n", | |||
" 482 for o1, o2, o3 in tqdm(pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout):\n", | |||
" 483 train_pref.append(o1)\n", | |||
" 484 val_pref.append(o2)\n", | |||
" 485 test_pref.append(o3)\n", | |||
" 486 pool.close()\n", | |||
" 487 pool.join()\n", | |||
" 488 \n", | |||
" 489 # # ---- use pool.map to parallel. ----\n", | |||
" 490 # result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))\n", | |||
" 491 # train_pref = [item[0] for item in result_perf]\n", | |||
" 492 # val_pref = [item[1] for item in result_perf]\n", | |||
" 493 # test_pref = [item[2] for item in result_perf]\n", | |||
" 494 \n", | |||
" 495 # # ---- use joblib.Parallel to parallel and track progress. ----\n", | |||
" 496 # trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)\n", | |||
" 497 # result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS))\n", | |||
" 498 # train_pref = [item[0] for item in result_perf]\n", | |||
" 499 # val_pref = [item[1] for item in result_perf]\n", | |||
" 500 # test_pref = [item[2] for item in result_perf]\n", | |||
" 501 \n", | |||
" 502 # # ---- direct running, normally use a single CPU core. ----\n", | |||
" 503 # train_pref = []\n", | |||
" 504 # val_pref = []\n", | |||
" 505 # test_pref = []\n", | |||
" 506 # for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):\n", | |||
" 507 # o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)\n", | |||
" 508 # train_pref.append(o1)\n", | |||
" 509 # val_pref.append(o2)\n", | |||
" 510 # test_pref.append(o3)\n", | |||
" 511 \n", | |||
" 512 print()\n", | |||
" 513 print('4. Getting final performance...')\n", | |||
" 514 str_fw += '\\nIII. Performance.\\n\\n'\n", | |||
" 515 # averages and confidences of performances on outer trials for each combination of parameters\n", | |||
" 516 average_train_scores = np.mean(train_pref, axis=0)\n", | |||
" 517 average_val_scores = np.mean(val_pref, axis=0)\n", | |||
" 518 average_perf_scores = np.mean(test_pref, axis=0)\n", | |||
" 519 # sample std is used here\n", | |||
" 520 std_train_scores = np.std(train_pref, axis=0, ddof=1)\n", | |||
" 521 std_val_scores = np.std(val_pref, axis=0, ddof=1)\n", | |||
" 522 std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n", | |||
" 523 \n", | |||
" 524 if model_type == 'regression':\n", | |||
" 525 best_val_perf = np.amin(average_val_scores)\n", | |||
" 526 else:\n", | |||
" 527 best_val_perf = np.amax(average_val_scores)\n", | |||
" 528 best_params_index = np.where(average_val_scores == best_val_perf)\n", | |||
" 529 # find smallest val std with best val perf.\n", | |||
" 530 best_val_stds = [\n", | |||
" 531 std_val_scores[value][best_params_index[1][idx]]\n", | |||
" 532 for idx, value in enumerate(best_params_index[0])\n", | |||
" 533 ]\n", | |||
" 534 min_val_std = np.amin(best_val_stds)\n", | |||
" 535 best_params_index = np.where(std_val_scores == min_val_std)\n", | |||
" 536 best_params_out = [\n", | |||
" 537 param_list_pre_revised[i] for i in best_params_index[0]\n", | |||
" 538 ]\n", | |||
" 539 best_params_in = [param_list[i] for i in best_params_index[1]]\n", | |||
" 540 print('best_params_out: ', best_params_out)\n", | |||
" 541 print('best_params_in: ', best_params_in)\n", | |||
" 542 print()\n", | |||
" 543 print('best_val_perf: ', best_val_perf)\n", | |||
" 544 print('best_val_std: ', min_val_std)\n", | |||
" 545 str_fw += 'best settings of hyper-params to build gram matrix: %s\\n' % best_params_out\n", | |||
" 546 str_fw += 'best settings of other hyper-params: %s\\n\\n' % best_params_in\n", | |||
" 547 str_fw += 'best_val_perf: %s\\n' % best_val_perf\n", | |||
" 548 str_fw += 'best_val_std: %s\\n' % min_val_std\n", | |||
" 549 \n", | |||
" 550 final_performance = [\n", | |||
" 551 average_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 552 for idx, value in enumerate(best_params_index[0])\n", | |||
" 553 ]\n", | |||
" 554 final_confidence = [\n", | |||
" 555 std_perf_scores[value][best_params_index[1][idx]]\n", | |||
" 556 for idx, value in enumerate(best_params_index[0])\n", | |||
" 557 ]\n", | |||
" 558 print('final_performance: ', final_performance)\n", | |||
" 559 print('final_confidence: ', final_confidence)\n", | |||
" 560 str_fw += 'final_performance: %s\\n' % final_performance\n", | |||
" 561 str_fw += 'final_confidence: %s\\n' % final_confidence\n", | |||
" 562 train_performance = [\n", | |||
" 563 average_train_scores[value][best_params_index[1][idx]]\n", | |||
" 564 for idx, value in enumerate(best_params_index[0])\n", | |||
" 565 ]\n", | |||
" 566 train_std = [\n", | |||
" 567 std_train_scores[value][best_params_index[1][idx]]\n", | |||
" 568 for idx, value in enumerate(best_params_index[0])\n", | |||
" 569 ]\n", | |||
" 570 print('train_performance: %s' % train_performance)\n", | |||
" 571 print('train_std: ', train_std)\n", | |||
" 572 str_fw += 'train_performance: %s\\n' % train_performance\n", | |||
" 573 str_fw += 'train_std: %s\\n\\n' % train_std\n", | |||
" 574 \n", | |||
" 575 print()\n", | |||
" 576 average_gram_matrix_time = np.mean(gram_matrix_time)\n", | |||
" 577 std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)\n", | |||
" 578 best_gram_matrix_time = [\n", | |||
" 579 gram_matrix_time[i] for i in best_params_index[0]\n", | |||
" 580 ]\n", | |||
" 581 ave_bgmt = np.mean(best_gram_matrix_time)\n", | |||
" 582 std_bgmt = np.std(best_gram_matrix_time, ddof=1)\n", | |||
" 583 print(\n", | |||
" 584 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'\n", | |||
" 585 .format(average_gram_matrix_time, std_gram_matrix_time))\n", | |||
" 586 print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(\n", | |||
" 587 ave_bgmt, std_bgmt))\n", | |||
" 588 tt_poster = time.time() - tts # training time with hyper-param choices who did not participate in calculation of gram matrices\n", | |||
" 589 print(\n", | |||
" 590 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format(\n", | |||
" 591 tt_poster))\n", | |||
" 592 print('total training time with all hyper-param choices: {:.2f}s'.format(\n", | |||
" 593 tt_poster + np.sum(gram_matrix_time)))\n", | |||
" 594 # str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\\n'.format(average_gram_matrix_time, std_gram_matrix_time)\n", | |||
" 595 # str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\\n'.format(ave_bgmt, std_bgmt)\n", | |||
" 596 str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\\n\\n'.format(tt_poster)\n", | |||
" 597 \n", | |||
" 598 # print out as table.\n", | |||
" 599 from collections import OrderedDict\n", | |||
" 600 from tabulate import tabulate\n", | |||
" 601 table_dict = {}\n", | |||
" 602 if model_type == 'regression':\n", | |||
" 603 for param_in in param_list:\n", | |||
" 604 param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])\n", | |||
" 605 else:\n", | |||
" 606 for param_in in param_list:\n", | |||
" 607 param_in['C'] = '{:.2e}'.format(param_in['C'])\n", | |||
" 608 table_dict['params'] = [{**param_out, **param_in}\n", | |||
" 609 for param_in in param_list for param_out in param_list_pre_revised]\n", | |||
" 610 # table_dict['gram_matrix_time'] = [\n", | |||
" 611 # '{:.2f}'.format(gram_matrix_time[index_out])\n", | |||
" 612 # for param_in in param_list\n", | |||
" 613 # for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 614 # ]\n", | |||
" 615 table_dict['valid_perf'] = [\n", | |||
" 616 '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],\n", | |||
" 617 std_val_scores[index_out][index_in])\n", | |||
" 618 for index_in, _ in enumerate(param_list)\n", | |||
" 619 for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 620 ]\n", | |||
" 621 table_dict['test_perf'] = [\n", | |||
" 622 '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],\n", | |||
" 623 std_perf_scores[index_out][index_in])\n", | |||
" 624 for index_in, _ in enumerate(param_list)\n", | |||
" 625 for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 626 ]\n", | |||
" 627 table_dict['train_perf'] = [\n", | |||
" 628 '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],\n", | |||
" 629 std_train_scores[index_out][index_in])\n", | |||
" 630 for index_in, _ in enumerate(param_list)\n", | |||
" 631 for index_out, _ in enumerate(param_list_pre_revised)\n", | |||
" 632 ]\n", | |||
" 633 keyorder = [\n", | |||
" 634 'params', 'train_perf', 'valid_perf', 'test_perf'\n", | |||
" 635 ]\n", | |||
" 636 print()\n", | |||
" 637 tb_print = tabulate(\n", | |||
" 638 OrderedDict(\n", | |||
" 639 sorted(table_dict.items(),\n", | |||
" 640 key=lambda i: keyorder.index(i[0]))),\n", | |||
" 641 headers='keys')\n", | |||
" 642 # print(tb_print)\n", | |||
" 643 str_fw += 'table of performance v.s. hyper-params:\\n\\n%s\\n\\n' % tb_print\n", | |||
" 644 \n", | |||
" 645 # open file to save all results for this dataset.\n", | |||
" 646 if not os.path.exists(results_dir):\n", | |||
" 647 os.makedirs(results_dir)\n", | |||
" 648 \n", | |||
" 649 # open file to save all results for this dataset.\n", | |||
" 650 145.6 MiB 0.0 MiB if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):\n", | |||
" 651 with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:\n", | |||
" 652 f.write(str_fw)\n", | |||
" 653 else:\n", | |||
" 654 145.6 MiB 0.0 MiB with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f:\n", | |||
" 655 145.6 MiB 0.0 MiB content = f.read()\n", | |||
" 656 145.6 MiB 0.0 MiB f.seek(0, 0)\n", | |||
" 657 145.6 MiB 0.0 MiB f.write(str_fw + '\\n\\n\\n' + content)\n", | |||
"\n", | |||
"\n", | |||
"\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"import functools\n", | |||
"import sys\n", | |||
"sys.path.insert(0, \"../\")\n", | |||
"sys.path.insert(0, \"../../\")\n", | |||
"from libs import *\n", | |||
"import multiprocessing\n", | |||
"\n", | |||
"from pygraph.kernels.spKernel import spkernel\n", | |||
"from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct\n", | |||
"#from pygraph.utils.model_selection_precomputed import trial_do\n", | |||
"\n", | |||
"dslist = [\n", | |||
" {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds',\n", | |||
" 'task': 'regression'}, # node symb\n", | |||
"# {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', 'task': 'regression',\n", | |||
"# 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt', }, \n", | |||
"# # contains single node graph, node symb\n", | |||
"# {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds', }, # node/edge symb\n", | |||
"# {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled\n", | |||
"# {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb\n", | |||
"# {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
"# # node nsymb\n", | |||
"# {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
"# # node symb/nsymb\n", | |||
"# {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||
"# # node/edge symb\n", | |||
"# {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat',\n", | |||
"# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb\n", | |||
"\n", | |||
" # {'name': 'COIL-DEL', 'dataset': '../../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb\n", | |||
" # # # {'name': 'BZR', 'dataset': '../../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # # {'name': 'COX2', 'dataset': '../../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # {'name': 'Fingerprint', 'dataset': '../../datasets/Fingerprint/Fingerprint_A.txt'},\n", | |||
" #\n", | |||
" # # {'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'SYNTHETIC', 'dataset': '../../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb\n", | |||
" # # {'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb\n", | |||
" # # {'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb\n", | |||
"\n", | |||
" # # {'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb\n", | |||
" # # {'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb\n", | |||
" # {'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1.mat',\n", | |||
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
" # {'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109.mat',\n", | |||
" # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb\n", | |||
" # {'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
" # 'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb\n", | |||
"\n", | |||
" # # not working below\n", | |||
" # {'name': 'PTC_FM', 'dataset': '../../datasets/PTC/Train/FM.ds',},\n", | |||
" # {'name': 'PTC_FR', 'dataset': '../../datasets/PTC/Train/FR.ds',},\n", | |||
" # {'name': 'PTC_MM', 'dataset': '../../datasets/PTC/Train/MM.ds',},\n", | |||
" # {'name': 'PTC_MR', 'dataset': '../../datasets/PTC/Train/MR.ds',},\n", | |||
"]\n", | |||
"estimator = spkernel\n", | |||
"mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)\n", | |||
"param_grid_precomputed = {'node_kernels': [\n", | |||
" {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}\n", | |||
"param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},\n", | |||
" {'alpha': np.logspace(-10, 10, num=41, base=10)}]\n", | |||
"\n", | |||
"for ds in dslist:\n", | |||
" print()\n", | |||
" print(ds['name'])\n", | |||
" model_selection_for_precomputed_kernel(\n", | |||
" ds['dataset'],\n", | |||
" estimator,\n", | |||
" param_grid_precomputed,\n", | |||
" (param_grid[1] if ('task' in ds and ds['task']\n", | |||
" == 'regression') else param_grid[0]),\n", | |||
" (ds['task'] if 'task' in ds else 'classification'),\n", | |||
" NUM_TRIALS=30,\n", | |||
" datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),\n", | |||
" extra_params=(ds['extra_params'] if 'extra_params' in ds else None),\n", | |||
" ds_name=ds['name'],\n", | |||
" n_jobs=multiprocessing.cpu_count(),\n", | |||
" read_gm_from_file=False)\n", | |||
" print()" | |||
] | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 3 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.6.7" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 2 | |||
} |
@@ -16,12 +16,13 @@ from tqdm import tqdm | |||
import networkx as nx | |||
import numpy as np | |||
import functools | |||
from libs import * | |||
#import multiprocessing | |||
from matplotlib import pyplot as plt | |||
from sklearn.model_selection import ParameterGrid | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from libs import * | |||
from pygraph.utils.utils import getSPGraph, direct_product | |||
from pygraph.utils.graphdataset import get_dataset_attributes | |||
from pygraph.utils.graphfiles import loadDataset | |||
@@ -605,20 +606,20 @@ def compute_gram_matrices(datafile, | |||
dslist = [ | |||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
{'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds', | |||
'task': 'regression'}, # node symb | |||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
{'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
{'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node symb/nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node/edge symb | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
# {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
] | |||
@@ -677,12 +678,12 @@ for idx1, ds in enumerate(dslist): | |||
print() | |||
print(gmtmat[idx1, :]) | |||
np.save('test_parallel/' + estimator.__name__ + '.' + ds['name'] + '_' + | |||
np.save('../test_parallel/' + estimator.__name__ + '.' + ds['name'] + '_' + | |||
str(idx1), gmtmat[idx1, :]) | |||
p = ax.plot(chunklist, gmtmat[idx1, :], '.-', label=ds['name'], zorder=3) | |||
ax.legend(loc='upper right', ncol=3, labelspacing=0.1, handletextpad=0.4, | |||
columnspacing=0.6) | |||
plt.savefig('test_parallel/' + estimator.__name__ + str(idx1) + '_' + | |||
plt.savefig('../test_parallel/' + estimator.__name__ + str(idx1) + '_' + | |||
str(cpus) + '.eps', format='eps', dpi=300) | |||
# plt.show() |
@@ -1,7 +1,7 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Test gram matrices. | |||
Check basic properties of gram matrices. | |||
Created on Wed Sep 19 15:32:29 2018 | |||
@author: ljia | |||
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt | |||
from numpy.linalg import eig | |||
# read gram matrices from file. | |||
results_dir = 'results/marginalizedkernel/myria' | |||
results_dir = '../results/marginalizedkernel/myria' | |||
ds_name = 'ENZYMES' | |||
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | |||
#print('gm time: ', gmfile['gmtime']) | |||
@@ -27,7 +27,7 @@ for idx, x in enumerate(gram_matrices): | |||
print(idx) | |||
plt.imshow(x) | |||
plt.colorbar() | |||
plt.savefig('check_gm/' + ds_name + '.gm.eps', format='eps', dpi=300) | |||
plt.savefig('../check_gm/' + ds_name + '.gm.eps', format='eps', dpi=300) | |||
# print(np.transpose(x)) | |||
print('if symmetric: ', np.array_equal(x, np.transpose(x))) | |||
@@ -45,4 +45,4 @@ for idx, x in enumerate(gram_matrices): | |||
[lamnda, v] = eig(x) | |||
print('min, max lambda: ', min(lamnda), max(lamnda)) | |||
if -1e-10 > min(lamnda): | |||
raise Exception('wrong eigen values.') | |||
raise Exception('wrong eigen values.') |
@@ -16,7 +16,7 @@ Author : Sandro Vega-Pons, Emanuele Olivetti | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
import numpy as np | |||
import networkx as nx | |||
from pygraph.utils.graphfiles import loadDataset | |||
@@ -113,15 +113,15 @@ class GK_SP: | |||
ds_name = 'PAH' | |||
datafile = '../datasets/PAH/dataset.ds' | |||
datafile = '../../datasets/PAH/dataset.ds' | |||
dataset, y = loadDataset(datafile, filename_y=None, extra_params=None) | |||
gk_sp = GK_SP() | |||
x = gk_sp.compare_list(dataset) | |||
np.savez('check_gm/' + ds_name + '.gm.jstsp', gms=x) | |||
np.savez('../check_gm/' + ds_name + '.gm.jstsp', gms=x) | |||
plt.imshow(x) | |||
plt.colorbar() | |||
plt.savefig('check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300) | |||
plt.savefig('../check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300) | |||
# print(np.transpose(x)) | |||
print('if symmetric: ', np.array_equal(x, np.transpose(x))) | |||
@@ -33,7 +33,7 @@ def idx2chunksize2(idx): | |||
else: | |||
return (idx - 15) * 20000 * 10000 | |||
idx, mrlt, rlt = loadmin('test_parallel/myria/ENZYMES.npy') | |||
idx, mrlt, rlt = loadmin('../test_parallel/myria/ENZYMES.npy') | |||
csize = idx2chunksize2(idx) | |||
#dsize = np.array([183, 150, 68, 94, 188, 2250, 600]) |
@@ -60,5 +60,5 @@ plt.xticks(ind + width / 2, ('Acyclic', 'Alkane', 'MAO', 'PAH', 'MUTAG', 'Letter | |||
ax.set_ylim(bottom=1e-15) | |||
ax.legend((p1[0], p2[0], p3[0], p4[0]), ('min1', 'max1', 'min2', 'max2'), loc='upper right') | |||
plt.savefig('check_gm/compare_eigen_values.eps', format='eps', dpi=300) | |||
plt.savefig('../check_gm/compare_eigen_values.eps', format='eps', dpi=300) | |||
plt.show() |
@@ -81,6 +81,6 @@ ax.yaxis.set_ticks_position('none') | |||
fig.subplots_adjust(right=0.63) | |||
fig.legend(loc='right', ncol=1, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6) | |||
plt.savefig('check_gm/parallel_runtime_on_different_machines.eps', format='eps', dpi=300, | |||
plt.savefig('../check_gm/parallel_runtime_on_different_machines.eps', format='eps', dpi=300, | |||
transparent=True, bbox_inches='tight') | |||
plt.show() |
@@ -39,7 +39,7 @@ | |||
"\n", | |||
"Alkane:\n", | |||
"substructures : {'non linear', 'linear'}\n", | |||
"node_labeled : True\n", | |||
"node_labeled : False\n", | |||
"edge_labeled : False\n", | |||
"is_directed : False\n", | |||
"dataset_size : 150\n", | |||
@@ -55,7 +55,7 @@ | |||
"ave_fill_factor : 0.10199498404299989\n", | |||
"min_fill_factor : 0.0\n", | |||
"max_fill_factor : 0.25\n", | |||
"node_label_num : 2\n", | |||
"node_label_num : 1\n", | |||
"edge_label_num : 1\n", | |||
"node_attr_dim : 0\n", | |||
"edge_attr_dim : 0\n", | |||
@@ -542,8 +542,8 @@ | |||
"edge_attr_dim : 0\n", | |||
"class_number : 2\n", | |||
"\n", | |||
"load SDF: 100%|██████████| 4457424/4457424 [00:08<00:00, 522501.84it/s]\n", | |||
"ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4625.31it/s] \n", | |||
"load SDF: 100%|██████████| 4457424/4457424 [00:08<00:00, 497346.72it/s]\n", | |||
"ajust data: 100%|██████████| 42687/42687 [00:09<00:00, 4689.76it/s] \n", | |||
"\n", | |||
"NCI-HIV:\n", | |||
"substructures : {'non linear', 'linear'}\n", | |||
@@ -574,45 +574,45 @@ | |||
], | |||
"source": [ | |||
"import sys\n", | |||
"sys.path.insert(0, \"../\")\n", | |||
"sys.path.insert(0, \"../../\")\n", | |||
"from pygraph.utils.graphfiles import loadDataset\n", | |||
"from pygraph.utils.graphdataset import get_dataset_attributes\n", | |||
"\n", | |||
"dslist = [\n", | |||
" {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',},\n", | |||
" {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds',\n", | |||
" 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',},\n", | |||
" {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',},\n", | |||
" {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',},\n", | |||
" {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',\n", | |||
" {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds',},\n", | |||
" {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds',\n", | |||
" 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt',},\n", | |||
" {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds',},\n", | |||
" {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds',},\n", | |||
" {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}},\n", | |||
" {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
" {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
" {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||
" {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',\n", | |||
" {'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'},\n", | |||
" {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},\n", | |||
" {'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'},\n", | |||
" {'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}},\n", | |||
" {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'},\n", | |||
" {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n", | |||
" {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n", | |||
" {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n", | |||
" {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n", | |||
" {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'},\n", | |||
" {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'},\n", | |||
" {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, \n", | |||
" {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n", | |||
" {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, \n", | |||
" {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat',\n", | |||
" {'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'},\n", | |||
" {'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'},\n", | |||
" {'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'},\n", | |||
" {'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'},\n", | |||
" {'name': 'SYNTHETIC', 'dataset': '../../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'},\n", | |||
" {'name': 'BZR', 'dataset': '../../datasets/BZR_txt/BZR_A_sparse.txt'},\n", | |||
" {'name': 'COX2', 'dataset': '../../datasets/COX2_txt/COX2_A_sparse.txt'},\n", | |||
" {'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'}, \n", | |||
" {'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'},\n", | |||
" {'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, \n", | |||
" {'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n", | |||
" {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat',\n", | |||
" {'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109.mat',\n", | |||
" 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}},\n", | |||
" {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
" 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',},\n", | |||
" {'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf',\n", | |||
" 'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',},\n", | |||
"\n", | |||
"# # not working below\n", | |||
"# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',},\n", | |||
"# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',},\n", | |||
"# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',},\n", | |||
"# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',},\n", | |||
"# {'name': 'PTC_FM', 'dataset': '../../datasets/PTC/Train/FM.ds',},\n", | |||
"# {'name': 'PTC_FR', 'dataset': '../../datasets/PTC/Train/FR.ds',},\n", | |||
"# {'name': 'PTC_MM', 'dataset': '../../datasets/PTC/Train/MM.ds',},\n", | |||
"# {'name': 'PTC_MR', 'dataset': '../../datasets/PTC/Train/MR.ds',},\n", | |||
"]\n", | |||
"\n", | |||
"for ds in dslist:\n", |
@@ -0,0 +1,62 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Oct 17 16:07:38 2018 | |||
@author: ljia | |||
""" | |||
import sys | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.graphdataset import get_dataset_attributes | |||
dslist = [ | |||
{'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds',}, | |||
{'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', | |||
'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt',}, | |||
{'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds',}, | |||
{'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds',}, | |||
{'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, | |||
{'name': 'Letter-med', 'dataset': '../../datasets/Letter-med/Letter-med_A.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'Mutagenicity', 'dataset': '../../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
{'name': 'D&D', 'dataset': '../../datasets/D&D/DD.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, | |||
{'name': 'AIDS', 'dataset': '../../datasets/AIDS/AIDS_A.txt'}, | |||
{'name': 'FIRSTMM_DB', 'dataset': '../../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, | |||
{'name': 'MSRC9', 'dataset': '../../datasets/MSRC_9_txt/MSRC_9_A.txt'}, | |||
{'name': 'MSRC21', 'dataset': '../../datasets/MSRC_21_txt/MSRC_21_A.txt'}, | |||
{'name': 'SYNTHETIC', 'dataset': '../../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, | |||
{'name': 'BZR', 'dataset': '../../datasets/BZR_txt/BZR_A_sparse.txt'}, | |||
{'name': 'COX2', 'dataset': '../../datasets/COX2_txt/COX2_A_sparse.txt'}, | |||
{'name': 'DHFR', 'dataset': '../../datasets/DHFR_txt/DHFR_A_sparse.txt'}, | |||
{'name': 'PROTEINS', 'dataset': '../../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, | |||
{'name': 'PROTEINS_full', 'dataset': '../../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, | |||
{'name': 'NCI1', 'dataset': '../../datasets/NCI1/NCI1.mat', | |||
'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, | |||
{'name': 'NCI109', 'dataset': '../../datasets/NCI109/NCI109.mat', | |||
'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, | |||
{'name': 'NCI-HIV', 'dataset': '../../datasets/NCI-HIV/AIDO99SD.sdf', | |||
'dataset_y': '../../datasets/NCI-HIV/aids_conc_may04.txt',}, | |||
# # not working below | |||
# {'name': 'PTC_FM', 'dataset': '../../datasets/PTC/Train/FM.ds',}, | |||
# {'name': 'PTC_FR', 'dataset': '../../datasets/PTC/Train/FR.ds',}, | |||
# {'name': 'PTC_MM', 'dataset': '../../datasets/PTC/Train/MM.ds',}, | |||
# {'name': 'PTC_MR', 'dataset': '../../datasets/PTC/Train/MR.ds',}, | |||
] | |||
for ds in dslist: | |||
dataset, y = loadDataset( | |||
ds['dataset'], | |||
filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None)) | |||
attrs = get_dataset_attributes( | |||
dataset, target=y, node_label='atom', edge_label='bond_type') | |||
print() | |||
print(ds['name'] + ':') | |||
for atr in attrs: | |||
print(atr, ':', attrs[atr]) | |||
print() |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -19,7 +19,7 @@ from libs import * | |||
import multiprocessing | |||
dslist = [ | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -35,7 +35,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -19,7 +19,7 @@ from libs import * | |||
import multiprocessing | |||
dslist = [ | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -36,7 +36,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -21,7 +21,7 @@ import functools | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
dslist = [ | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -59,7 +59,7 @@ def run_ms(dataset, y, ds): | |||
'sub_kernel': ['geo', 'exp']} | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -21,7 +21,7 @@ import multiprocessing | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
dslist = [ | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -36,7 +36,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -21,7 +21,7 @@ import multiprocessing | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
dslist = [ | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -38,7 +38,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -19,7 +19,7 @@ from libs import * | |||
import multiprocessing | |||
dslist = [ | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -34,7 +34,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -9,7 +9,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -24,12 +24,12 @@ dslist = [ | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
# # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
{'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -45,7 +45,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -25,12 +25,12 @@ dslist = [ | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
# # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
{'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -47,7 +47,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -27,12 +27,12 @@ dslist = [ | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
# # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
{'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -70,7 +70,7 @@ def run_ms(dataset, y, ds): | |||
'sub_kernel': ['geo', 'exp']} | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -27,12 +27,12 @@ dslist = [ | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
# # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
{'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -47,7 +47,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -32,7 +32,7 @@ dslist = [ | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -49,7 +49,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -10,7 +10,7 @@ import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
sys.path.insert(0, "../../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.model_selection_precomputed import compute_gram_matrices | |||
from sklearn.model_selection import ParameterGrid | |||
@@ -25,12 +25,12 @@ dslist = [ | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||
# # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
{'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
{'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
@@ -45,7 +45,7 @@ def run_ms(dataset, y, ds): | |||
_, gram_matrix_time, _, _, _ = compute_gram_matrices( | |||
dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)), | |||
'../notebooks/results/' + estimator.__name__, ds['name'], | |||
'../../notebooks/results/' + estimator.__name__, ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) |
@@ -0,0 +1,28 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Mar 20 10:12:15 2019 | |||
inferring a graph grom path frequency. | |||
@author: ljia | |||
""" | |||
def SISF(K, v): | |||
if output: | |||
return output | |||
else: | |||
return 'no solution' | |||
def SISF_M(K, v): | |||
return output | |||
def GIPF_tree(K, v): | |||
if K == 1: | |||
pass | |||
if G: | |||
return G | |||
else: | |||
return 'no solution' | |||
def GIPF_M(K, v): | |||
return G |
@@ -139,9 +139,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||
y = [y[idxt] for idxt in idx_trim] # trim y accordingly | |||
# Kmatrix = np.random.rand(2250, 2250) | |||
# current_run_time = 0.1 | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
# remove graphs whose kernels with themselves are zeros | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
nb_g_ignore = 0 | |||
for idxk, diag in enumerate(Kmatrix_diag): | |||
if diag == 0: | |||
@@ -149,6 +149,7 @@ def model_selection_for_precomputed_kernel(datafile, | |||
Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1) | |||
nb_g_ignore += 1 | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||