|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075 |
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The line_profiler extension is already loaded. To reload it, use:\n",
- " %reload_ext line_profiler\n",
- "\n",
- " --- This is a regression problem ---\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.1 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 258.76952958106995 seconds ---\n",
- "[[ 0.0287062 0.0124634 0.00444444 ..., 0.00606061 0.00606061\n",
- " 0.00606061]\n",
- " [ 0.0124634 0.01108958 0.00333333 ..., 0.00454545 0.00454545\n",
- " 0.00454545]\n",
- " [ 0.00444444 0.00333333 0.0287062 ..., 0.00819912 0.00819912\n",
- " 0.00975875]\n",
- " ..., \n",
- " [ 0.00606061 0.00454545 0.00819912 ..., 0.02846735 0.02836907\n",
- " 0.02896354]\n",
- " [ 0.00606061 0.00454545 0.00819912 ..., 0.02836907 0.02831424\n",
- " 0.0288712 ]\n",
- " [ 0.00606061 0.00454545 0.00975875 ..., 0.02896354 0.0288712\n",
- " 0.02987915]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 12.186285\n",
- "With standard deviation: 7.038988\n",
- "\n",
- " Mean performance on test set: 18.024312\n",
- "With standard deviation: 6.292466\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.2 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 256.3271746635437 seconds ---\n",
- "[[ 0.06171557 0.03856471 0.01777778 ..., 0.02424242 0.02424242\n",
- " 0.02424242]\n",
- " [ 0.03856471 0.03579176 0.01333333 ..., 0.01818182 0.01818182\n",
- " 0.01818182]\n",
- " [ 0.01777778 0.01333333 0.06171557 ..., 0.02994207 0.02994207\n",
- " 0.03262072]\n",
- " ..., \n",
- " [ 0.02424242 0.01818182 0.02994207 ..., 0.07442109 0.07434207\n",
- " 0.07383563]\n",
- " [ 0.02424242 0.01818182 0.02994207 ..., 0.07434207 0.07430377\n",
- " 0.07376068]\n",
- " [ 0.02424242 0.01818182 0.03262072 ..., 0.07383563 0.07376068\n",
- " 0.07366354]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 13.955359\n",
- "With standard deviation: 7.544068\n",
- "\n",
- " Mean performance on test set: 18.337589\n",
- "With standard deviation: 5.854545\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.3 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 255.61398577690125 seconds ---\n",
- "[[ 0.09803909 0.07202114 0.04 ..., 0.05454545 0.05454545\n",
- " 0.05454545]\n",
- " [ 0.07202114 0.06853421 0.03 ..., 0.04090909 0.04090909\n",
- " 0.04090909]\n",
- " [ 0.04 0.03 0.09803909 ..., 0.06368916 0.06368916\n",
- " 0.06678704]\n",
- " ..., \n",
- " [ 0.05454545 0.04090909 0.06368916 ..., 0.12892852 0.12891455\n",
- " 0.12734365]\n",
- " [ 0.05454545 0.04090909 0.06368916 ..., 0.12891455 0.12892664\n",
- " 0.12733207]\n",
- " [ 0.05454545 0.04090909 0.06678704 ..., 0.12734365 0.12733207\n",
- " 0.1261675 ]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 13.939071\n",
- "With standard deviation: 7.958123\n",
- "\n",
- " Mean performance on test set: 18.495992\n",
- "With standard deviation: 5.734918\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.4 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 254.89703965187073 seconds ---\n",
- "[[ 0.13888889 0.11120616 0.07111111 ..., 0.0969697 0.0969697\n",
- " 0.0969697 ]\n",
- " [ 0.11120616 0.10756609 0.05333333 ..., 0.07272727 0.07272727\n",
- " 0.07272727]\n",
- " [ 0.07111111 0.05333333 0.13888889 ..., 0.10909713 0.10909713\n",
- " 0.11216176]\n",
- " ..., \n",
- " [ 0.0969697 0.07272727 0.10909713 ..., 0.19178929 0.19182091\n",
- " 0.18963212]\n",
- " [ 0.0969697 0.07272727 0.10909713 ..., 0.19182091 0.19186661\n",
- " 0.18966477]\n",
- " [ 0.0969697 0.07272727 0.11216176 ..., 0.18963212 0.18966477\n",
- " 0.18786824]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 16.259313\n",
- "With standard deviation: 6.693580\n",
- "\n",
- " Mean performance on test set: 19.449149\n",
- "With standard deviation: 5.371295\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.5 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 256.75693798065186 seconds ---\n",
- "[[ 0.18518519 0.15591398 0.11111111 ..., 0.15151515 0.15151515\n",
- " 0.15151515]\n",
- " [ 0.15591398 0.15254237 0.08333333 ..., 0.11363636 0.11363636\n",
- " 0.11363636]\n",
- " [ 0.11111111 0.08333333 0.18518519 ..., 0.16617791 0.16617791\n",
- " 0.16890214]\n",
- " ..., \n",
- " [ 0.15151515 0.11363636 0.16617791 ..., 0.26386999 0.26391515\n",
- " 0.26158184]\n",
- " [ 0.15151515 0.11363636 0.16617791 ..., 0.26391515 0.26396688\n",
- " 0.26162729]\n",
- " [ 0.15151515 0.11363636 0.16890214 ..., 0.26158184 0.26162729\n",
- " 0.25964592]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 17.018055\n",
- "With standard deviation: 6.844372\n",
- "\n",
- " Mean performance on test set: 19.785683\n",
- "With standard deviation: 5.550543\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.6 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 256.5566437244415 seconds ---\n",
- "[[ 0.23809524 0.20664506 0.16 ..., 0.21818182 0.21818182\n",
- " 0.21818182]\n",
- " [ 0.20664506 0.20385906 0.12 ..., 0.16363636 0.16363636\n",
- " 0.16363636]\n",
- " [ 0.16 0.12 0.23809524 ..., 0.2351024 0.2351024\n",
- " 0.23727718]\n",
- " ..., \n",
- " [ 0.21818182 0.16363636 0.2351024 ..., 0.34658956 0.34662512\n",
- " 0.34454945]\n",
- " [ 0.21818182 0.16363636 0.2351024 ..., 0.34662512 0.34666325\n",
- " 0.34458505]\n",
- " [ 0.21818182 0.16363636 0.23727718 ..., 0.34454945 0.34458505\n",
- " 0.34279503]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 17.661762\n",
- "With standard deviation: 6.567179\n",
- "\n",
- " Mean performance on test set: 20.192158\n",
- "With standard deviation: 5.591223\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.7 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 254.9531705379486 seconds ---\n",
- "[[ 0.2991453 0.26444601 0.21777778 ..., 0.2969697 0.2969697\n",
- " 0.2969697 ]\n",
- " [ 0.26444601 0.26246188 0.16333333 ..., 0.22272727 0.22272727\n",
- " 0.22272727]\n",
- " [ 0.21777778 0.16333333 0.2991453 ..., 0.31614548 0.31614548\n",
- " 0.31765009]\n",
- " ..., \n",
- " [ 0.2969697 0.22272727 0.31614548 ..., 0.44189997 0.44191814\n",
- " 0.44038348]\n",
- " [ 0.2969697 0.22272727 0.31614548 ..., 0.44191814 0.44193708\n",
- " 0.44040164]\n",
- " [ 0.2969697 0.22272727 0.31765009 ..., 0.44038348 0.44040164\n",
- " 0.43906772]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 20.588213\n",
- "With standard deviation: 5.746009\n",
- "\n",
- " Mean performance on test set: 21.661372\n",
- "With standard deviation: 6.026849\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.8 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 252.80415797233582 seconds ---\n",
- "[[ 0.37037037 0.33093141 0.28444444 ..., 0.38787879 0.38787879\n",
- " 0.38787879]\n",
- " [ 0.33093141 0.32983023 0.21333333 ..., 0.29090909 0.29090909\n",
- " 0.29090909]\n",
- " [ 0.28444444 0.21333333 0.37037037 ..., 0.4096795 0.4096795\n",
- " 0.41049599]\n",
- " ..., \n",
- " [ 0.38787879 0.29090909 0.4096795 ..., 0.55242487 0.55243009\n",
- " 0.5515636 ]\n",
- " [ 0.38787879 0.29090909 0.4096795 ..., 0.55243009 0.55243545\n",
- " 0.55156881]\n",
- " [ 0.38787879 0.29090909 0.41049599 ..., 0.5515636 0.55156881\n",
- " 0.55081257]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 23.594332\n",
- "With standard deviation: 3.806374\n",
- "\n",
- " Mean performance on test set: 22.996018\n",
- "With standard deviation: 6.083466\n",
- "\n",
- "\n",
- " #--- calculating kernel matrix when p_quit = 0.9 ---#\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 256.7384788990021 seconds ---\n",
- "[[ 0.45454545 0.40839542 0.36 ..., 0.49090909 0.49090909\n",
- " 0.49090909]\n",
- " [ 0.40839542 0.40805534 0.27 ..., 0.36818182 0.36818182\n",
- " 0.36818182]\n",
- " [ 0.36 0.27 0.45454545 ..., 0.51619708 0.51619708\n",
- " 0.51644564]\n",
- " ..., \n",
- " [ 0.49090909 0.36818182 0.51619708 ..., 0.68172189 0.68172233\n",
- " 0.68145294]\n",
- " [ 0.49090909 0.36818182 0.51619708 ..., 0.68172233 0.68172277\n",
- " 0.68145338]\n",
- " [ 0.49090909 0.36818182 0.51644564 ..., 0.68145294 0.68145338\n",
- " 0.68121781]]\n",
- "\n",
- " Saving kernel matrix to file...\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- " Mean performance on train set: 25.808155\n",
- "With standard deviation: 3.312074\n",
- "\n",
- " Mean performance on test set: 24.424089\n",
- "With standard deviation: 4.951191\n",
- "\n",
- "\n",
- " p_quit RMSE_test std_test RMSE_train std_train k_time\n",
- "-------- ----------- ---------- ------------ ----------- --------\n",
- " 0.1 18.0243 6.29247 12.1863 7.03899 258.77\n",
- " 0.2 18.3376 5.85454 13.9554 7.54407 256.327\n",
- " 0.3 18.496 5.73492 13.9391 7.95812 255.614\n",
- " 0.4 19.4491 5.3713 16.2593 6.69358 254.897\n",
- " 0.5 19.7857 5.55054 17.0181 6.84437 256.757\n",
- " 0.6 20.1922 5.59122 17.6618 6.56718 256.557\n",
- " 0.7 21.6614 6.02685 20.5882 5.74601 254.953\n",
- " 0.8 22.996 6.08347 23.5943 3.80637 252.804\n",
- " 0.9 24.4241 4.95119 25.8082 3.31207 256.738\n"
- ]
- }
- ],
- "source": [
- "%load_ext line_profiler\n",
- "\n",
- "import numpy as np\n",
- "import sys\n",
- "sys.path.insert(0, \"../\")\n",
- "from pygraph.utils.utils import kernel_train_test\n",
- "from pygraph.kernels.marginalizedKernel import marginalizedkernel, _marginalizedkernel_do\n",
- "\n",
- "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
- "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
- "\n",
- "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', itr = 20)\n",
- "\n",
- "kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n",
- " hyper_name = 'p_quit', hyper_range = np.linspace(0.1, 0.9, 9), normalize = False)\n",
- "\n",
- "# %lprun -f _marginalizedkernel_do \\\n",
- "# kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n",
- "# hyper_name = 'p_quit', hyper_range = np.linspace(0.1, 0.9, 9), normalize = False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# results\n",
- "\n",
- "# with y normalization\n",
- " p_quit RMSE_test std_test RMSE_train std_train k_time\n",
- "-------- ----------- ---------- ------------ ----------- --------\n",
- " 0.1 18.0192 6.27867 12.1642 6.99821 266.905\n",
- " 0.2 18.3374 5.84775 13.9376 7.51398 256.288\n",
- " 0.3 18.4955 5.73774 13.9291 7.9416 254.441\n",
- " 0.4 19.4498 5.37509 16.2538 6.68378 257.581\n",
- " 0.5 19.7851 5.55018 17.0142 6.83653 248.562\n",
- " 0.6 20.1911 5.58951 17.6595 6.56211 249.667\n",
- " 0.7 21.6606 6.02589 20.5872 5.74395 243.046\n",
- " 0.8 22.9959 6.08344 23.5941 3.80595 252.36\n",
- " 0.9 24.424 4.9512 25.8082 3.31202 248.077\n",
- "\n",
- "# without y normalization\n",
- " p_quit RMSE_test std_test RMSE_train std_train k_time\n",
- "-------- ----------- ---------- ------------ ----------- --------\n",
- " 0.1 18.0243 6.29247 12.1863 7.03899 258.77\n",
- " 0.2 18.3376 5.85454 13.9554 7.54407 256.327\n",
- " 0.3 18.496 5.73492 13.9391 7.95812 255.614\n",
- " 0.4 19.4491 5.3713 16.2593 6.69358 254.897\n",
- " 0.5 19.7857 5.55054 17.0181 6.84437 256.757\n",
- " 0.6 20.1922 5.59122 17.6618 6.56718 256.557\n",
- " 0.7 21.6614 6.02685 20.5882 5.74601 254.953\n",
- " 0.8 22.996 6.08347 23.5943 3.80637 252.804\n",
- " 0.9 24.4241 4.95119 25.8082 3.31207 256.738"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- " --- This is a regression problem ---\n",
- "\n",
- "\n",
- " Loading dataset from file...\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 1133.0229969024658 seconds ---\n",
- "[[ 0.0287062 0.0124634 0.00444444 ..., 0.00606061 0.00606061\n",
- " 0.00606061]\n",
- " [ 0.0124634 0.01108958 0.00333333 ..., 0.00454545 0.00454545\n",
- " 0.00454545]\n",
- " [ 0.00444444 0.00333333 0.0287062 ..., 0.00819912 0.00819912\n",
- " 0.00975875]\n",
- " ..., \n",
- " [ 0.00606061 0.00454545 0.00819912 ..., 0.02846735 0.02836907\n",
- " 0.02896354]\n",
- " [ 0.00606061 0.00454545 0.00819912 ..., 0.02836907 0.02831424\n",
- " 0.0288712 ]\n",
- " [ 0.00606061 0.00454545 0.00975875 ..., 0.02896354 0.0288712\n",
- " 0.02987915]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on train set: 12.186285\n",
- "With standard deviation: 7.038988\n",
- "\n",
- " Mean performance on test set: 18.024312\n",
- "With standard deviation: 6.292466\n",
- "\n",
- "\n",
- " rmse_test std_test rmse_train std_train k_time\n",
- "----------- ---------- ------------ ----------- --------\n",
- " 18.0243 6.29247 12.1863 7.03899 1133.02\n"
- ]
- }
- ],
- "source": [
- "%load_ext line_profiler\n",
- "\n",
- "import numpy as np\n",
- "import sys\n",
- "sys.path.insert(0, \"../\")\n",
- "from pygraph.utils.utils import kernel_train_test\n",
- "from pygraph.kernels.marginalizedKernel import marginalizedkernel, _marginalizedkernel_do\n",
- "\n",
- "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
- "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
- "\n",
- "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', itr = 20, p_quit = 0.1)\n",
- "\n",
- "# kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n",
- "# hyper_name = 'p_quit', hyper_range = np.linspace(0.1, 0.9, 9), normalize = False)\n",
- "\n",
- "%lprun -f _marginalizedkernel_do \\\n",
- " kernel_train_test(datafile, kernel_file_path, marginalizedkernel, kernel_para, \\\n",
- " normalize = False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "Timer unit: 1e-06 s\n",
- "\n",
- "Total time: 828.879 s\n",
- "File: ../pygraph/kernels/marginalizedKernel.py\n",
- "Function: _marginalizedkernel_do at line 67\n",
- "\n",
- "Line # Hits Time Per Hit % Time Line Contents\n",
- "==============================================================\n",
- " 67 def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):\n",
- " 68 \"\"\"Calculate marginalized graph kernel between 2 graphs.\n",
- " 69 \n",
- " 70 Parameters\n",
- " 71 ----------\n",
- " 72 G1, G2 : NetworkX graphs\n",
- " 73 2 graphs between which the kernel is calculated.\n",
- " 74 node_label : string\n",
- " 75 node attribute used as label.\n",
- " 76 edge_label : string\n",
- " 77 edge attribute used as label.\n",
- " 78 p_quit : integer\n",
- " 79 the termination probability in the random walks generating step.\n",
- " 80 itr : integer\n",
- " 81 time of iterations to calculate R_inf.\n",
- " 82 \n",
- " 83 Return\n",
- " 84 ------\n",
- " 85 kernel : float\n",
- " 86 Marginalized Kernel between 2 graphs.\n",
- " 87 \"\"\"\n",
- " 88 # init parameters\n",
- " 89 17205 12886.0 0.7 0.0 kernel = 0\n",
- " 90 17205 52542.0 3.1 0.0 num_nodes_G1 = nx.number_of_nodes(G1)\n",
- " 91 17205 28240.0 1.6 0.0 num_nodes_G2 = nx.number_of_nodes(G2)\n",
- " 92 17205 15595.0 0.9 0.0 p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)\n",
- " 93 17205 11587.0 0.7 0.0 p_init_G2 = 1 / num_nodes_G2\n",
- " 94 \n",
- " 95 17205 11663.0 0.7 0.0 q = p_quit * p_quit\n",
- " 96 17205 10728.0 0.6 0.0 r1 = q\n",
- " 97 \n",
- " 98 # initial R_inf\n",
- " 99 17205 38412.0 2.2 0.0 R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes\n",
- " 100 \n",
- " 101 # calculate R_inf with a simple interative method\n",
- " 102 344100 329235.0 1.0 0.0 for i in range(1, itr):\n",
- " 103 326895 900354.0 2.8 0.1 R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])\n",
- " 104 326895 2287346.0 7.0 0.3 R_inf_new.fill(r1)\n",
- " 105 \n",
- " 106 # calculate R_inf for each pair of nodes\n",
- " 107 2653464 3667117.0 1.4 0.4 for node1 in G1.nodes(data = True):\n",
- " 108 2326569 7522840.0 3.2 0.9 neighbor_n1 = G1[node1[0]]\n",
- " 109 2326569 3492118.0 1.5 0.4 p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)\n",
- " 110 24024379 27775021.0 1.2 3.4 for node2 in G2.nodes(data = True):\n",
- " 111 21697810 69471941.0 3.2 8.4 neighbor_n2 = G2[node2[0]]\n",
- " 112 21697810 32446626.0 1.5 3.9 p_trans_n2 = (1 - p_quit) / len(neighbor_n2) \n",
- " 113 \n",
- " 114 59095092 52545370.0 0.9 6.3 for neighbor1 in neighbor_n1:\n",
- " 115 104193150 92513935.0 0.9 11.2 for neighbor2 in neighbor_n2:\n",
- " 116 \n",
- " 117 t = p_trans_n1 * p_trans_n2 * \\\n",
- " 118 66795868 285324518.0 4.3 34.4 deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \\\n",
- " 119 66795868 137934393.0 2.1 16.6 deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])\n",
- " 120 66795868 106834143.0 1.6 12.9 R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)\n",
- " 121 \n",
- " 122 326895 1123677.0 3.4 0.1 R_inf[:] = R_inf_new\n",
- " 123 \n",
- " 124 # add elements of R_inf up and calculate kernel\n",
- " 125 139656 330283.0 2.4 0.0 for node1 in G1.nodes(data = True):\n",
- " 126 1264441 1435263.0 1.1 0.2 for node2 in G2.nodes(data = True): \n",
- " 127 1141990 1377134.0 1.2 0.2 s = p_init_G1 * p_init_G2 * deltakernel(node1[1][node_label] == node2[1][node_label])\n",
- " 128 1141990 1375456.0 1.2 0.2 kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)\n",
- " 129 \n",
- " 130 17205 10801.0 0.6 0.0 return kernel"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "scrolled": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "- This script take as input a kernel matrix\n",
- "and returns the classification or regression performance\n",
- "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
- "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
- "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
- "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
- "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
- "correspond to the average of the performances on the test sets. \n",
- "\n",
- "@references\n",
- " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
- "\n",
- "\n",
- " Loading dataset from file...\n",
- "[ -23.7 14. 37.3 109.7 10.8 39. 42. 66.6 135. 148.5\n",
- " 40. 34.6 32. 63. 53.5 67. 64.4 84.7 95.5 92.\n",
- " 84.4 154. 156. 166. 183. 70.3 63.6 52.5 59. 59.5\n",
- " 55.2 88. 83. 104.5 102. 92. 107.4 123.2 112.5 118.5\n",
- " 101.5 173.7 165.5 181. 99.5 92.3 90.1 80.2 82. 91.2\n",
- " 91.5 81.2 93. 69. 86.3 82. 103. 103.5 96. 112. 104.\n",
- " 132.5 123.5 120.3 145. 144.2 142.8 132. 134.2 137. 139.\n",
- " 133.6 120.4 120. 137. 195.8 177.2 181. 185.9 175.7 186. 211.\n",
- " 125. 118. 117.1 107. 102.5 112. 97.4 91.5 87.6 106.5\n",
- " 101. 99.3 90. 137. 114. 126. 124. 140.5 157.5 146. 145.\n",
- " 141. 171. 166. 155. 145. 159. 138. 142. 159. 163.5\n",
- " 229.5 142. 125. 132. 130.5 125. 122. 121. 122.2 112. 106.\n",
- " 114.5 151. 128.5 109.5 126. 147. 158. 147. 165. 188.9\n",
- " 170. 178. 148.5 165. 177. 167. 195. 226. 215. 201. 205.\n",
- " 151.5 165.5 157. 139. 163. 153.5 139. 162. 173. 159.5\n",
- " 159.5 155.5 141. 126. 164. 163. 166.5 146. 165. 159. 195.\n",
- " 218. 250. 235. 186.5 156.5 162. 162. 170.2 173.2 186.8\n",
- " 173. 187. 174. 188.5 199. 228. 215. 216. 240. ]\n",
- "\n",
- " --- This is a regression problem ---\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.1 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 246.21349620819092 seconds ---\n",
- "[[ 0.0287062 0.0124634 0.00444444 ..., 0.00606061 0.00606061\n",
- " 0.00606061]\n",
- " [ 0.0124634 0.01108958 0.00333333 ..., 0.00454545 0.00454545\n",
- " 0.00454545]\n",
- " [ 0.00444444 0.00333333 0.0287062 ..., 0.00819912 0.00819912\n",
- " 0.00975875]\n",
- " ..., \n",
- " [ 0.00606061 0.00454545 0.00819912 ..., 0.02846735 0.02836907\n",
- " 0.02896354]\n",
- " [ 0.00606061 0.00454545 0.00819912 ..., 0.02836907 0.02831424\n",
- " 0.0288712 ]\n",
- " [ 0.00606061 0.00454545 0.00975875 ..., 0.02896354 0.0288712\n",
- " 0.02987915]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 51.192412\n",
- "With standard deviation: 58.804642\n",
- "\n",
- " Mean performance on test set: 18.518782\n",
- "With standard deviation: 7.749004\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.2 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 240.73209404945374 seconds ---\n",
- "[[ 0.06171557 0.03856471 0.01777778 ..., 0.02424242 0.02424242\n",
- " 0.02424242]\n",
- " [ 0.03856471 0.03579176 0.01333333 ..., 0.01818182 0.01818182\n",
- " 0.01818182]\n",
- " [ 0.01777778 0.01333333 0.06171557 ..., 0.02994207 0.02994207\n",
- " 0.03262072]\n",
- " ..., \n",
- " [ 0.02424242 0.01818182 0.02994207 ..., 0.07442109 0.07434207\n",
- " 0.07383563]\n",
- " [ 0.02424242 0.01818182 0.02994207 ..., 0.07434207 0.07430377\n",
- " 0.07376068]\n",
- " [ 0.02424242 0.01818182 0.03262072 ..., 0.07383563 0.07376068\n",
- " 0.07366354]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 56.692288\n",
- "With standard deviation: 58.162153\n",
- "\n",
- " Mean performance on test set: 17.899091\n",
- "With standard deviation: 6.591042\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.3 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 244.91414594650269 seconds ---\n",
- "[[ 0.09803909 0.07202114 0.04 ..., 0.05454545 0.05454545\n",
- " 0.05454545]\n",
- " [ 0.07202114 0.06853421 0.03 ..., 0.04090909 0.04090909\n",
- " 0.04090909]\n",
- " [ 0.04 0.03 0.09803909 ..., 0.06368916 0.06368916\n",
- " 0.06678704]\n",
- " ..., \n",
- " [ 0.05454545 0.04090909 0.06368916 ..., 0.12892852 0.12891455\n",
- " 0.12734365]\n",
- " [ 0.05454545 0.04090909 0.06368916 ..., 0.12891455 0.12892664\n",
- " 0.12733207]\n",
- " [ 0.05454545 0.04090909 0.06678704 ..., 0.12734365 0.12733207\n",
- " 0.1261675 ]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 54.360795\n",
- "With standard deviation: 61.733054\n",
- "\n",
- " Mean performance on test set: 18.392352\n",
- "With standard deviation: 7.101611\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.4 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 246.01012706756592 seconds ---\n",
- "[[ 0.13888889 0.11120616 0.07111111 ..., 0.0969697 0.0969697\n",
- " 0.0969697 ]\n",
- " [ 0.11120616 0.10756609 0.05333333 ..., 0.07272727 0.07272727\n",
- " 0.07272727]\n",
- " [ 0.07111111 0.05333333 0.13888889 ..., 0.10909713 0.10909713\n",
- " 0.11216176]\n",
- " ..., \n",
- " [ 0.0969697 0.07272727 0.10909713 ..., 0.19178929 0.19182091\n",
- " 0.18963212]\n",
- " [ 0.0969697 0.07272727 0.10909713 ..., 0.19182091 0.19186661\n",
- " 0.18966477]\n",
- " [ 0.0969697 0.07272727 0.11216176 ..., 0.18963212 0.18966477\n",
- " 0.18786824]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 44.518253\n",
- "With standard deviation: 44.478206\n",
- "\n",
- " Mean performance on test set: 19.623259\n",
- "With standard deviation: 6.248069\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.5 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 241.62482810020447 seconds ---\n",
- "[[ 0.18518519 0.15591398 0.11111111 ..., 0.15151515 0.15151515\n",
- " 0.15151515]\n",
- " [ 0.15591398 0.15254237 0.08333333 ..., 0.11363636 0.11363636\n",
- " 0.11363636]\n",
- " [ 0.11111111 0.08333333 0.18518519 ..., 0.16617791 0.16617791\n",
- " 0.16890214]\n",
- " ..., \n",
- " [ 0.15151515 0.11363636 0.16617791 ..., 0.26386999 0.26391515\n",
- " 0.26158184]\n",
- " [ 0.15151515 0.11363636 0.16617791 ..., 0.26391515 0.26396688\n",
- " 0.26162729]\n",
- " [ 0.15151515 0.11363636 0.16890214 ..., 0.26158184 0.26162729\n",
- " 0.25964592]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 42.848719\n",
- "With standard deviation: 39.189276\n",
- "\n",
- " Mean performance on test set: 19.993624\n",
- "With standard deviation: 6.299511\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.6 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 240.8926112651825 seconds ---\n",
- "[[ 0.23809524 0.20664506 0.16 ..., 0.21818182 0.21818182\n",
- " 0.21818182]\n",
- " [ 0.20664506 0.20385906 0.12 ..., 0.16363636 0.16363636\n",
- " 0.16363636]\n",
- " [ 0.16 0.12 0.23809524 ..., 0.2351024 0.2351024\n",
- " 0.23727718]\n",
- " ..., \n",
- " [ 0.21818182 0.16363636 0.2351024 ..., 0.34658956 0.34662512\n",
- " 0.34454945]\n",
- " [ 0.21818182 0.16363636 0.2351024 ..., 0.34662512 0.34666325\n",
- " 0.34458505]\n",
- " [ 0.21818182 0.16363636 0.23727718 ..., 0.34454945 0.34458505\n",
- " 0.34279503]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 39.983104\n",
- "With standard deviation: 32.270969\n",
- "\n",
- " Mean performance on test set: 20.546624\n",
- "With standard deviation: 6.261735\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.7 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 240.47843861579895 seconds ---\n",
- "[[ 0.2991453 0.26444601 0.21777778 ..., 0.2969697 0.2969697\n",
- " 0.2969697 ]\n",
- " [ 0.26444601 0.26246188 0.16333333 ..., 0.22272727 0.22272727\n",
- " 0.22272727]\n",
- " [ 0.21777778 0.16333333 0.2991453 ..., 0.31614548 0.31614548\n",
- " 0.31765009]\n",
- " ..., \n",
- " [ 0.2969697 0.22272727 0.31614548 ..., 0.44189997 0.44191814\n",
- " 0.44038348]\n",
- " [ 0.2969697 0.22272727 0.31614548 ..., 0.44191814 0.44193708\n",
- " 0.44040164]\n",
- " [ 0.2969697 0.22272727 0.31765009 ..., 0.44038348 0.44040164\n",
- " 0.43906772]]\n",
- "\n",
- " Saving kernel matrix to file...\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- " Mean performance on val set: 37.530308\n",
- "With standard deviation: 29.730795\n",
- "\n",
- " Mean performance on test set: 21.701779\n",
- "With standard deviation: 6.335305\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.8 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 242.16377139091492 seconds ---\n",
- "[[ 0.37037037 0.33093141 0.28444444 ..., 0.38787879 0.38787879\n",
- " 0.38787879]\n",
- " [ 0.33093141 0.32983023 0.21333333 ..., 0.29090909 0.29090909\n",
- " 0.29090909]\n",
- " [ 0.28444444 0.21333333 0.37037037 ..., 0.4096795 0.4096795\n",
- " 0.41049599]\n",
- " ..., \n",
- " [ 0.38787879 0.29090909 0.4096795 ..., 0.55242487 0.55243009\n",
- " 0.5515636 ]\n",
- " [ 0.38787879 0.29090909 0.4096795 ..., 0.55243009 0.55243545\n",
- " 0.55156881]\n",
- " [ 0.38787879 0.29090909 0.41049599 ..., 0.5515636 0.55156881\n",
- " 0.55081257]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 37.110483\n",
- "With standard deviation: 21.287120\n",
- "\n",
- " Mean performance on test set: 23.148949\n",
- "With standard deviation: 6.102457\n",
- "\n",
- " --- calculating kernel matrix when termimation probability = 0.9 ---\n",
- "\n",
- " Calculating kernel matrix, this could take a while...\n",
- "\n",
- " --- marginalized kernel matrix of size 185 built in 238.44418454170227 seconds ---\n",
- "[[ 0.45454545 0.40839542 0.36 ..., 0.49090909 0.49090909\n",
- " 0.49090909]\n",
- " [ 0.40839542 0.40805534 0.27 ..., 0.36818182 0.36818182\n",
- " 0.36818182]\n",
- " [ 0.36 0.27 0.45454545 ..., 0.51619708 0.51619708\n",
- " 0.51644564]\n",
- " ..., \n",
- " [ 0.49090909 0.36818182 0.51619708 ..., 0.68172189 0.68172233\n",
- " 0.68145294]\n",
- " [ 0.49090909 0.36818182 0.51619708 ..., 0.68172233 0.68172277\n",
- " 0.68145338]\n",
- " [ 0.49090909 0.36818182 0.51644564 ..., 0.68145294 0.68145338\n",
- " 0.68121781]]\n",
- "\n",
- " Saving kernel matrix to file...\n",
- "\n",
- " Mean performance on val set: 30.572040\n",
- "With standard deviation: 11.057046\n",
- "\n",
- " Mean performance on test set: 24.715650\n",
- "With standard deviation: 4.891587\n",
- "\n",
- "\n",
- " p_quit std RMSE\n",
- "-------- ------- -------\n",
- " 0.1 7.749 18.5188\n",
- " 0.2 6.59104 17.8991\n",
- " 0.3 7.10161 18.3924\n",
- " 0.4 6.24807 19.6233\n",
- " 0.5 6.29951 19.9936\n",
- " 0.6 6.26173 20.5466\n",
- " 0.7 6.33531 21.7018\n",
- " 0.8 6.10246 23.1489\n",
- " 0.9 4.89159 24.7157\n"
- ]
- }
- ],
- "source": [
- "# Author: Elisabetta Ghisu\n",
- "\n",
- "\"\"\"\n",
- "- This script take as input a kernel matrix\n",
- "and returns the classification or regression performance\n",
- "- The kernel matrix can be calculated using any of the graph kernels approaches\n",
- "- The criteria used for prediction are SVM for classification and kernel Ridge regression for regression\n",
- "- For predition we divide the data in training, validation and test. For each split, we first train on the train data, \n",
- "then evaluate the performance on the validation. We choose the optimal parameters for the validation set and finally\n",
- "provide the corresponding performance on the test set. If more than one split is performed, the final results \n",
- "correspond to the average of the performances on the test sets. \n",
- "\n",
- "@references\n",
- " https://github.com/eghisu/GraphKernels/blob/master/GraphKernelsCollection/python_scripts/compute_perf_gk.py\n",
- "\"\"\"\n",
- "\n",
- "print(__doc__)\n",
- "\n",
- "import sys\n",
- "import os\n",
- "import pathlib\n",
- "sys.path.insert(0, \"../\")\n",
- "from tabulate import tabulate\n",
- "\n",
- "import random\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "from sklearn.kernel_ridge import KernelRidge # 0.17\n",
- "from sklearn.metrics import accuracy_score, mean_squared_error\n",
- "from sklearn import svm\n",
- "\n",
- "from pygraph.kernels.marginalizedKernel import marginalizedkernel\n",
- "from pygraph.utils.graphfiles import loadDataset\n",
- "\n",
- "print('\\n Loading dataset from file...')\n",
- "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
- "y = np.array(y)\n",
- "print(y)\n",
- "\n",
- "# setup the parameters\n",
- "model_type = 'regression' # Regression or classification problem\n",
- "print('\\n --- This is a %s problem ---' % model_type)\n",
- "\n",
- "datasize = len(dataset)\n",
- "trials = 100 # Trials for hyperparameters random search\n",
- "splits = 10 # Number of splits of the data\n",
- "alpha_grid = np.logspace(-10, 10, num = trials, base = 10) # corresponds to (2*C)^-1 in other linear models such as LogisticRegression\n",
- "C_grid = np.logspace(-10, 10, num = trials, base = 10)\n",
- "random.seed(20) # Set the seed for uniform parameter distribution\n",
- "\n",
- "# set the output path\n",
- "kernel_file_path = 'kernelmatrices_marginalized_acyclic/'\n",
- "if not os.path.exists(kernel_file_path):\n",
- " os.makedirs(kernel_file_path)\n",
- "\n",
- "\n",
- "\"\"\"\n",
- "- Here starts the main program\n",
- "- First we permute the data, then for each split we evaluate corresponding performances\n",
- "- In the end, the performances are averaged over the test sets\n",
- "\"\"\"\n",
- "val_means_pquit = []\n",
- "val_stds_pquit = []\n",
- "test_means_pquit = []\n",
- "test_stds_pquit = []\n",
- "\n",
- "\n",
- "for p_quit in np.linspace(0.1, 0.9, 9):\n",
- " print('\\n --- calculating kernel matrix when termimation probability = %.1f ---' % p_quit)\n",
- "\n",
- " # save kernel matrices to files / read kernel matrices from files\n",
- " kernel_file = kernel_file_path + 'p_quit-' + str(p_quit)\n",
- " path = pathlib.Path(kernel_file)\n",
- " # get train set kernel matrix\n",
- " if path.is_file():\n",
- " print('\\n Loading the kernel matrix from file...')\n",
- " Kmatrix = np.loadtxt(kernel_file)\n",
- " print(Kmatrix)\n",
- " else:\n",
- " print('\\n Calculating kernel matrix, this could take a while...')\n",
- " Kmatrix, run_time = marginalizedkernel(dataset, p_quit = p_quit, itr = 20, node_label = 'atom', edge_label = 'bond_type')\n",
- " print(Kmatrix)\n",
- " print('\\n Saving kernel matrix to file...')\n",
- " np.savetxt(kernel_file, Kmatrix)\n",
- "\n",
- " # Initialize the performance of the best parameter trial on validation with the corresponding performance on test\n",
- " val_split = []\n",
- " test_split = []\n",
- "\n",
- " # For each split of the data\n",
- " for j in range(10, 10 + splits):\n",
- "# print('\\n Starting split %d...' % j)\n",
- "\n",
- " # Set the random set for data permutation\n",
- " random_state = int(j)\n",
- " np.random.seed(random_state)\n",
- " idx_perm = np.random.permutation(datasize)\n",
- " # print(idx_perm)\n",
- "\n",
- " # Permute the data\n",
- " y_perm = y[idx_perm] # targets permutation\n",
- " # print(y_perm)\n",
- " Kmatrix_perm = Kmatrix[:, idx_perm] # inputs permutation\n",
- " # print(Kmatrix_perm)\n",
- " Kmatrix_perm = Kmatrix_perm[idx_perm, :] # inputs permutation\n",
- "\n",
- " # Set the training, validation and test\n",
- " # Note: the percentage can be set up by the user\n",
- " num_train_val = int((datasize * 90) / 100) # 90% (of entire dataset) for training and validation\n",
- " num_test = datasize - num_train_val # 10% (of entire dataset) for test\n",
- " num_train = int((num_train_val * 90) / 100) # 90% (of train + val) for training\n",
- " num_val = num_train_val - num_train # 10% (of train + val) for validation\n",
- "\n",
- " # Split the kernel matrix\n",
- " Kmatrix_train = Kmatrix_perm[0:num_train, 0:num_train]\n",
- " Kmatrix_val = Kmatrix_perm[num_train:(num_train + num_val), 0:num_train]\n",
- " Kmatrix_test = Kmatrix_perm[(num_train + num_val):datasize, 0:num_train]\n",
- "\n",
- " # Split the targets\n",
- " y_train = y_perm[0:num_train]\n",
- "\n",
- " # Normalization step (for real valued targets only)\n",
- " if model_type == 'regression':\n",
- "# print('\\n Normalizing output y...')\n",
- " y_train_mean = np.mean(y_train)\n",
- " y_train_std = np.std(y_train)\n",
- " y_train = (y_train - y_train_mean) / float(y_train_std)\n",
- " # print(y)\n",
- "\n",
- " y_val = y_perm[num_train:(num_train + num_val)]\n",
- " y_test = y_perm[(num_train + num_val):datasize]\n",
- "\n",
- " # Record the performance for each parameter trial respectively on validation and test set\n",
- " perf_all_val = []\n",
- " perf_all_test = []\n",
- "\n",
- " # For each parameter trial\n",
- " for i in range(trials):\n",
- " # For regression use the Kernel Ridge method\n",
- " if model_type == 'regression':\n",
- " # print('\\n Starting experiment for trial %d and parameter alpha = %3f\\n ' % (i, alpha_grid[i]))\n",
- "\n",
- " # Fit the kernel ridge model\n",
- " KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])\n",
- "# KR = svm.SVR(kernel = 'precomputed', C = C_grid[i])\n",
- " KR.fit(Kmatrix_train, y_train)\n",
- "\n",
- " # predict on the validation and test set\n",
- " y_pred = KR.predict(Kmatrix_val)\n",
- " y_pred_test = KR.predict(Kmatrix_test)\n",
- " # print(y_pred)\n",
- "\n",
- " # adjust prediction: needed because the training targets have been normalizaed\n",
- " y_pred = y_pred * float(y_train_std) + y_train_mean\n",
- " # print(y_pred)\n",
- " y_pred_test = y_pred_test * float(y_train_std) + y_train_mean\n",
- " # print(y_pred_test)\n",
- "\n",
- " # root mean squared error on validation\n",
- " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n",
- " perf_all_val.append(rmse)\n",
- "\n",
- " # root mean squared error in test \n",
- " rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n",
- " perf_all_test.append(rmse_test)\n",
- "\n",
- " # print('The performance on the validation set is: %3f' % rmse)\n",
- " # print('The performance on the test set is: %3f' % rmse_test)\n",
- "\n",
- " # --- FIND THE OPTIMAL PARAMETERS --- #\n",
- " # For regression: minimise the mean squared error\n",
- " if model_type == 'regression':\n",
- "\n",
- " # get optimal parameter on validation (argmin mean squared error)\n",
- " min_idx = np.argmin(perf_all_test)\n",
- " alpha_opt = alpha_grid[min_idx]\n",
- "\n",
- " # performance corresponding to optimal parameter on val\n",
- " perf_val_opt = perf_all_val[min_idx]\n",
- "\n",
- " # corresponding performance on test for the same parameter\n",
- " perf_test_opt = perf_all_test[min_idx]\n",
- "\n",
- "# print('The best performance is for trial %d with parameter alpha = %3f' % (min_idx, alpha_opt))\n",
- "# print('The best performance on the validation set is: %3f' % perf_val_opt)\n",
- "# print('The corresponding performance on test set is: %3f' % perf_test_opt)\n",
- " \n",
- " # append the best performance on validation\n",
- " # at the current split\n",
- " val_split.append(perf_val_opt)\n",
- "\n",
- " # append the correponding performance on the test set\n",
- " test_split.append(perf_test_opt)\n",
- " \n",
- " # average the results\n",
- " # mean of the validation performances over the splits\n",
- " val_mean = np.mean(np.asarray(val_split))\n",
- " # std deviation of validation over the splits\n",
- " val_std = np.std(np.asarray(val_split))\n",
- "\n",
- " # mean of the test performances over the splits\n",
- " test_mean = np.mean(np.asarray(test_split))\n",
- " # std deviation of the test oer the splits\n",
- " test_std = np.std(np.asarray(test_split))\n",
- " \n",
- " print('\\n Mean performance on val set: %3f' % val_mean)\n",
- " print('With standard deviation: %3f' % val_std)\n",
- " print('\\n Mean performance on test set: %3f' % test_mean)\n",
- " print('With standard deviation: %3f' % test_std)\n",
- " \n",
- " val_means_pquit.append(val_mean)\n",
- " val_stds_pquit.append(val_std)\n",
- " test_means_pquit.append(test_mean)\n",
- " test_stds_pquit.append(test_std)\n",
- "\n",
- "print('\\n') \n",
- "print(tabulate({'p_quit': np.linspace(0.1, 0.9, 9), 'RMSE': test_means_pquit, 'std': test_stds_pquit}, headers='keys'))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|