{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--- This is a regression problem ---\n", "\n", "1. Loading dataset from file...\n", "\n", "2. Calculating gram matrices. This could take a while...\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 183 built in 0.35580015182495117 seconds ---\n", "\n", "gram matrix with parameters {'base_kernel': 'subtree', 'height': 0} is: \n", "[[ 5. 6. 4. ... 20. 20. 20.]\n", " [ 6. 8. 4. ... 20. 20. 20.]\n", " [ 4. 4. 5. ... 21. 21. 21.]\n", " ...\n", " [ 20. 20. 21. ... 101. 101. 101.]\n", " [ 20. 20. 21. ... 101. 101. 101.]\n", " [ 20. 20. 21. ... 101. 101. 101.]]\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 183 built in 0.812713623046875 seconds ---\n", "\n", "gram matrix with parameters {'base_kernel': 'subtree', 'height': 1} is: \n", "[[ 10. 10. 4. ... 20. 20. 20.]\n", " [ 10. 16. 4. ... 20. 20. 20.]\n", " [ 4. 4. 10. ... 22. 22. 24.]\n", " ...\n", " [ 20. 20. 22. ... 130. 130. 122.]\n", " [ 20. 20. 22. ... 130. 130. 122.]\n", " [ 20. 20. 24. ... 122. 122. 154.]]\n", "\n", " --- Weisfeiler-Lehman subtree kernel matrix of size 183 built in 1.3875529766082764 seconds ---\n", "\n", "gram matrix with parameters {'base_kernel': 'subtree', 'height': 2} is: \n", "[[ 15. 4. 0. ... 0. 0. 0.]\n", " [ 4. 24. 0. ... 0. 0. 0.]\n", " [ 0. 0. 15. ... 1. 1. 5.]\n", " ...\n", " [ 0. 0. 1. ... 87. 58. 23.]\n", " [ 0. 0. 1. ... 58. 67. 23.]\n", " [ 0. 0. 5. ... 23. 23. 101.]]\n", "\n", "3. Fitting and predicting using nested cross validation. This could really take a while...\n", "cross validation: 0%| | 0/2 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.datasets import load_iris\n", "from matplotlib import pyplot as plt\n", "from sklearn.kernel_ridge import KernelRidge\n", "from sklearn.svm import SVC\n", "from sklearn.metrics import accuracy_score, mean_squared_error\n", "from sklearn.model_selection import GridSearchCV, cross_val_score, KFold, train_test_split, ParameterGrid\n", "import numpy as np\n", "\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel\n", "\n", "print(__doc__)\n", "\n", "\n", "\n", "# Load the dataset\n", "iris = load_iris()\n", "X_iris = iris.data\n", "y_iris = iris.target\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "dataset, y = loadDataset(datafile)\n", "\n", "# Set up possible values of parameters to optimize over\n", "p_grid = {\"C\": [1, 10, 100],\n", " \"gamma\": [.01, .1]}\n", "\n", "estimator = weisfeilerlehmankernel\n", "param_grid_precomputed = {'height': [0,1,2,3], 'base_kernel': ['subtree']} # param grid for precomputed kernel gram matrix\n", "# param_grid = {\"C\": [1, 10, 100]}\n", "param_grid = {\"alpha\": np.logspace(-5, 5, num = 21, base = 10)}\n", "NUM_TRIALS = 3 # Number of random trials\n", "\n", "\n", "# We will use a Support Vector Classifier with \"rbf\" kernel\n", "# svm = SVC(kernel=\"precomputed\")\n", "\n", "best_params = []\n", "gram_matrices = []\n", "run_time = []\n", "\n", "param_list_precomputed = list(ParameterGrid(param_grid_precomputed))\n", "# print('param_list_precomputed: ', param_list_precomputed)\n", "param_list = list(ParameterGrid(param_grid))\n", "# print('param_list: ', param_list)\n", "\n", "# Arrays to store scores\n", "train_pref = np.zeros((NUM_TRIALS, len(param_list_precomputed), len(param_list)))\n", "val_pref = np.zeros((NUM_TRIALS, len(param_list_precomputed), len(param_list)))\n", "test_pref = np.zeros((NUM_TRIALS, len(param_list_precomputed), len(param_list)))\n", "\n", "# calculate all gram matrices\n", "for params_out in param_list_precomputed:\n", "# print('params_out: ', params_out)\n", " Kmatrix, current_run_time = estimator(dataset, **params_out)\n", "# print('gram matrix: ', Kmatrix)\n", " import matplotlib.pyplot as plt\n", " plt.matshow(Kmatrix)\n", " gram_matrices.append(Kmatrix)\n", " run_time.append(current_run_time)\n", "\n", "# Loop for each trial\n", "for trial in range(NUM_TRIALS): # Test set level\n", " print()\n", " print('trial: ', trial)\n", " \n", " for index_out, params_out in enumerate(param_list_precomputed):\n", "# print()\n", "# print('index_out: ', index_out) \n", "# print('params_out: ', params_out) \n", " X_app,X_test,y_app,y_test = train_test_split(gram_matrices[index_out], y, test_size=0.1)\n", " split_index_app = [y.index(y_i) for y_i in y_app if y_i in y]\n", " split_index_test = [y.index(y_i) for y_i in y_test if y_i in y]\n", " X_app = X_app[:,split_index_app]\n", " X_test = X_test[:,split_index_app] \n", " y_app = np.array(y_app)\n", " y_test = np.array(y_test)\n", "# print('split_index_app: ', split_index_app)\n", "# print('split_index_test: ', split_index_test)\n", "# print('X_app: ', X_app)\n", "# print('y_app: ', y_app)\n", "# print('X_app_shape: ', X_app.shape)\n", "\n", " for index_in, params_in in enumerate(param_list):\n", "# print()\n", "# print('params_in: ', params_in)\n", " inner_cv = KFold(n_splits=10, shuffle=True, random_state=trial)\n", " KR = KernelRidge(kernel = 'precomputed', **params_in)\n", " current_train_perf = []\n", " current_valid_perf = []\n", " current_test_perf = []\n", "# print('split_y_app: ', list(inner_cv.split(y_app)))\n", " \n", " for train_index, valid_index in inner_cv.split(X_app): # validation set level\n", "# print()\n", "# print('train_index: ', train_index)\n", "# print('train_index_shape: ', train_index.shape)\n", "# print('valid_index: ', valid_index)\n", "# print('valid_index_shape: ', valid_index.shape)\n", " KR.fit(X_app[train_index,:][:,train_index], y_app[train_index])\n", " \n", " # predict on the train and test set\n", " y_pred_train = KR.predict(X_app[train_index,:][:,train_index])\n", " y_pred_valid = KR.predict(X_app[valid_index,:][:,train_index])\n", " y_pred_test = KR.predict(X_test[:,train_index])\n", "\n", " # root mean squared errors\n", " current_train_perf.append(np.sqrt(mean_squared_error(y_app[train_index], y_pred_train)))\n", " current_valid_perf.append(np.sqrt(mean_squared_error(y_app[valid_index], y_pred_valid)))\n", " current_test_perf.append(np.sqrt(mean_squared_error(y_test, y_pred_test)))\n", "# print('y_test: ', y_test)\n", "# print('y_pred_test: ', y_pred_test)\n", " \n", "# print('current_train_perf: ', current_train_perf)\n", "# print('current_valid_perf: ', current_valid_perf)\n", "# print('current_test_perf: ', current_test_perf)\n", " \n", " train_pref[trial][index_out][index_in] = np.mean(current_train_perf)\n", " val_pref[trial][index_out][index_in] = np.mean(current_valid_perf)\n", " test_pref[trial][index_out][index_in] = np.mean(current_test_perf)\n", " \n", "print('train_pref: ', train_pref)\n", "print('val_pref: ', val_pref)\n", "print('test_pref: ', test_pref)\n", "\n", "average_train_scores = np.mean(train_pref, axis=0)\n", "average_val_scores = np.mean(val_pref, axis=0)\n", "average_perf_scores = np.mean(test_pref, axis=0)\n", "std_train_scores = np.std(train_pref, axis=0, ddof=1) # sample std is used here\n", "std_val_scores = np.std(val_pref, axis=0, ddof=1)\n", "std_perf_scores = np.std(test_pref, axis=0, ddof=1)\n", "# print('average_train_scores: ', average_train_scores)\n", "# print('average_val_scores: ', average_val_scores)\n", "# print('average_perf_scores: ', average_perf_scores)\n", "# print('std_train_scores: ', std_train_scores)\n", "# print('std_val_scores: ', std_val_scores)\n", "# print('std_perf_scores: ', std_perf_scores)\n", "best_val_perf = np.amin(average_val_scores)\n", "print('best_val_perf: ', best_val_perf)\n", "best_params_index = np.where(average_val_scores == best_val_perf)\n", "best_params_out = [param_list_precomputed[i] for i in best_params_index[0]]\n", "best_params_in = [param_list[i] for i in best_params_index[1]]\n", "print('best_params_index: ', best_params_index)\n", "print('best_params_out: ', best_params_out)\n", "print('best_params_in: ', best_params_in)\n", "# below: only find one performance; muitiple pref might exist\n", "best_val_std = std_val_scores[best_params_index[0][0]][best_params_index[1][0]]\n", "print('best_val_std: ', best_val_std)\n", "\n", "final_performance = average_perf_scores[best_params_index[0][0]][best_params_index[1][0]]\n", "final_confidence = std_perf_scores[best_params_index[0][0]][best_params_index[1][0]]\n", "print('final_performance: ', final_performance)\n", "print('final_confidence: ', final_confidence)\n", "train_performance = average_train_scores[best_params_index[0][0]][best_params_index[1][0]]\n", "train_std = std_train_scores[best_params_index[0][0]][best_params_index[1][0]]\n", "print('train_performance: ', train_performance)\n", "print('train_std: ', train_std)\n", "\n", "best_run_time = run_time[best_params_index[0][0]]\n", "print('run_time: ', run_time)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'o1': 1, 'o2': 2}, {'o1': 3, 'o2': 4}]\n", "[{'i2': 6, 'i1': 5}, {'i2': 8, 'i1': 7}, {'i2': 10, 'i1': 9}]\n" ] }, { "data": { "text/plain": [ "[({'o1': 1, 'o2': 2}, {'o1': 3, 'o2': 4}),\n", " ({'o1': 3, 'o2': 4}, {'o1': 1, 'o2': 2})]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = [ {'o1':1,'o2':2},{'o1':3,'o2':4}]\n", "print(x)\n", "y = [ {'i1':5,'i2':6},{'i1':7,'i2':8},{'i1':9,'i2':10}]\n", "print(y)\n", "from itertools import permutations\n", "[item for item in permutations(x)]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Automatically created module for IPython interactive environment\n", "\n", "0\n", "{'std_score_time': array([3.69876958e-05, 1.59321777e-05, 2.74488935e-05, 1.14532461e-05,\n", " 1.88863033e-06, 2.37223491e-06]), 'split2_train_score': array([0.91666667, 0.96428571, 0.96428571, 0.96428571, 0.97619048,\n", " 0.97619048]), 'split0_test_score': array([0.92857143, 0.96428571, 0.96428571, 0.96428571, 0.96428571,\n", " 0.96428571]), 'split1_train_score': array([0.86904762, 0.96428571, 0.96428571, 0.97619048, 0.97619048,\n", " 0.97619048]), 'split2_test_score': array([0.96428571, 0.96428571, 0.96428571, 1. , 1. ,\n", " 1. ]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00021189, 0.0001657 , 0.00017828, 0.00015265, 0.00014174,\n", " 0.00014162]), 'mean_train_score': array([0.9077381 , 0.9702381 , 0.97321429, 0.98214286, 0.98511905,\n", " 0.98511905]), 'mean_fit_time': array([0.00046653, 0.0002929 , 0.00035262, 0.00029558, 0.00025451,\n", " 0.00026101]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.05646924, 0.04639422, 0.03092948, 0.01785714, 0.01546474,\n", " 0.05854856]), 'std_train_score': array([0.03705327, 0.01030983, 0.01546474, 0.01330993, 0.00987091,\n", " 0.00987091]), 'mean_test_score': array([0.89285714, 0.9375 , 0.94642857, 0.98214286, 0.97321429,\n", " 0.95535714]), 'split3_test_score': array([0.82142857, 0.85714286, 0.89285714, 0.96428571, 0.96428571,\n", " 0.85714286]), 'rank_test_score': array([6, 5, 4, 1, 2, 3], dtype=int32), 'split1_test_score': array([0.85714286, 0.96428571, 0.96428571, 1. , 0.96428571,\n", " 1. ]), 'std_fit_time': array([9.01377338e-05, 6.28426330e-06, 3.60406571e-05, 5.15247249e-05,\n", " 1.17498278e-05, 1.70351493e-05]), 'split3_train_score': array([0.96428571, 0.98809524, 1. , 1. , 1. ,\n", " 1. ]), 'split0_train_score': array([0.88095238, 0.96428571, 0.96428571, 0.98809524, 0.98809524,\n", " 0.98809524]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "1\n", "{'std_score_time': array([8.33272081e-06, 2.06132131e-06, 1.89343984e-05, 5.24080428e-06,\n", " 6.61047489e-07, 1.83423550e-06]), 'split2_train_score': array([0.91666667, 0.96428571, 0.96428571, 0.98809524, 0.97619048,\n", " 0.98809524]), 'split0_test_score': array([0.92857143, 0.96428571, 1. , 1. , 1. ,\n", " 0.96428571]), 'split1_train_score': array([0.95238095, 0.97619048, 0.97619048, 0.98809524, 0.98809524,\n", " 0.98809524]), 'split2_test_score': array([1. , 1. , 1. , 1. , 1. ,\n", " 0.96428571]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00018692, 0.00016367, 0.00017142, 0.00014895, 0.0001412 ,\n", " 0.00014251]), 'mean_train_score': array([0.93154762, 0.97321429, 0.9702381 , 0.99107143, 0.98809524,\n", " 0.98809524]), 'mean_fit_time': array([0.00044656, 0.00030571, 0.00029963, 0.00027484, 0.00025165,\n", " 0.00027138]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.06376275, 0.01546474, 0.01785714, 0.02961272, 0.02961272,\n", " 0. ]), 'std_train_score': array([0.01297291, 0.00987091, 0.00595238, 0.00515491, 0.00841794,\n", " 0.00841794]), 'mean_test_score': array([0.91964286, 0.97321429, 0.98214286, 0.97321429, 0.97321429,\n", " 0.96428571]), 'split3_test_score': array([0.92857143, 0.96428571, 0.96428571, 0.96428571, 0.96428571,\n", " 0.96428571]), 'rank_test_score': array([6, 2, 1, 2, 2, 5], dtype=int32), 'split1_test_score': array([0.82142857, 0.96428571, 0.96428571, 0.92857143, 0.92857143,\n", " 0.96428571]), 'std_fit_time': array([9.50373959e-05, 3.66022995e-06, 1.56844048e-05, 1.16622698e-05,\n", " 8.33527856e-06, 1.73377812e-05]), 'split3_train_score': array([0.92857143, 0.96428571, 0.96428571, 1. , 1. ,\n", " 1. ]), 'split0_train_score': array([0.92857143, 0.98809524, 0.97619048, 0.98809524, 0.98809524,\n", " 0.97619048]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "2\n", "{'std_score_time': array([3.39013698e-06, 2.39236739e-06, 1.81965083e-06, 2.35117470e-06,\n", " 1.03924249e-06, 4.57831964e-07]), 'split2_train_score': array([0.92857143, 0.97619048, 0.97619048, 0.98809524, 0.98809524,\n", " 1. ]), 'split0_test_score': array([1., 1., 1., 1., 1., 1.]), 'split1_train_score': array([0.94047619, 1. , 1. , 1. , 1. ,\n", " 1. ]), 'split2_test_score': array([0.96428571, 1. , 1. , 1. , 1. ,\n", " 0.89285714]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00017661, 0.00015908, 0.00015056, 0.00014818, 0.00014067,\n", " 0.00014156]), 'mean_train_score': array([0.90178571, 0.98511905, 0.98511905, 0.98809524, 0.98809524,\n", " 0.99404762]), 'mean_fit_time': array([0.00038451, 0.00029421, 0.00026572, 0.00026315, 0.00024247,\n", " 0.00025213]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.09902265, 0.01785714, 0.01785714, 0.04374089, 0.04374089,\n", " 0.05646924]), 'std_train_score': array([0.06033671, 0.00987091, 0.00987091, 0.00841794, 0.00841794,\n", " 0.01030983]), 'mean_test_score': array([0.88392857, 0.98214286, 0.98214286, 0.96428571, 0.96428571,\n", " 0.92857143]), 'split3_test_score': array([0.78571429, 0.96428571, 0.96428571, 0.96428571, 0.96428571,\n", " 0.96428571]), 'rank_test_score': array([6, 1, 1, 3, 3, 5], dtype=int32), 'split1_test_score': array([0.78571429, 0.96428571, 0.96428571, 0.89285714, 0.89285714,\n", " 0.85714286]), 'std_fit_time': array([4.54199517e-05, 3.46733083e-06, 4.18932042e-06, 3.81609399e-06,\n", " 4.63232022e-06, 5.49527672e-06]), 'split3_train_score': array([0.79761905, 0.98809524, 0.98809524, 0.98809524, 0.98809524,\n", " 1. ]), 'split0_train_score': array([0.94047619, 0.97619048, 0.97619048, 0.97619048, 0.97619048,\n", " 0.97619048]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "3\n", "{'std_score_time': array([1.44149616e-05, 1.05787750e-06, 8.49235466e-07, 4.57831964e-07,\n", " 8.76005796e-07, 9.88431212e-07]), 'split2_train_score': array([0.89285714, 0.96428571, 0.95238095, 0.97619048, 0.96428571,\n", " 0.96428571]), 'split0_test_score': array([0.85714286, 0.96428571, 0.96428571, 0.96428571, 0.96428571,\n", " 0.96428571]), 'split1_train_score': array([0.94047619, 0.98809524, 0.98809524, 0.98809524, 0.98809524,\n", " 1. ]), 'split2_test_score': array([0.85714286, 0.89285714, 0.89285714, 0.92857143, 0.92857143,\n", " 0.92857143]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00018787, 0.00016254, 0.00016016, 0.00015181, 0.00014877,\n", " 0.00014991]), 'mean_train_score': array([0.9047619 , 0.96428571, 0.96130952, 0.97619048, 0.97321429,\n", " 0.97321429]), 'mean_fit_time': array([0.00039715, 0.00030398, 0.00029951, 0.00027311, 0.00025588,\n", " 0.00027567]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.04374089, 0.03992979, 0.03992979, 0.02525381, 0.01546474,\n", " 0.01785714]), 'std_train_score': array([0.02227177, 0.0145803 , 0.01546474, 0.00841794, 0.00987091,\n", " 0.01760738]), 'mean_test_score': array([0.89285714, 0.94642857, 0.94642857, 0.96428571, 0.95535714,\n", " 0.94642857]), 'split3_test_score': array([0.96428571, 1. , 1. , 1. , 0.96428571,\n", " 0.96428571]), 'rank_test_score': array([6, 3, 3, 1, 2, 3], dtype=int32), 'split1_test_score': array([0.89285714, 0.92857143, 0.92857143, 0.96428571, 0.96428571,\n", " 0.92857143]), 'std_fit_time': array([5.56155415e-05, 1.26754851e-05, 1.82599488e-05, 2.66826502e-06,\n", " 3.90808343e-06, 3.21367993e-06]), 'split3_train_score': array([0.9047619 , 0.95238095, 0.95238095, 0.96428571, 0.97619048,\n", " 0.97619048]), 'split0_train_score': array([0.88095238, 0.95238095, 0.95238095, 0.97619048, 0.96428571,\n", " 0.95238095]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "4\n", "{'std_score_time': array([3.84623189e-06, 2.97307108e-06, 9.29144458e-07, 8.93115488e-06,\n", " 1.71796883e-05, 3.76084054e-05]), 'split2_train_score': array([0.9047619 , 0.94047619, 0.94047619, 0.96428571, 0.96428571,\n", " 0.96428571]), 'split0_test_score': array([0.82142857, 1. , 0.96428571, 1. , 0.96428571,\n", " 0.92857143]), 'split1_train_score': array([0.89285714, 0.94047619, 0.94047619, 0.97619048, 0.97619048,\n", " 0.97619048]), 'split2_test_score': array([0.89285714, 0.96428571, 0.96428571, 0.96428571, 1. ,\n", " 1. ]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00019467, 0.0001688 , 0.00015813, 0.00016057, 0.00016457,\n", " 0.00017679]), 'mean_train_score': array([0.90178571, 0.95535714, 0.95833333, 0.97916667, 0.97916667,\n", " 0.97619048]), 'mean_fit_time': array([0.0004366 , 0.00031209, 0.00028533, 0.0002737 , 0.00026488,\n", " 0.0002774 ]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.04374089, 0.03891874, 0.03092948, 0.04374089, 0.04374089,\n", " 0.02961272]), 'std_train_score': array([0.00515491, 0.01546474, 0.01785714, 0.01297291, 0.01297291,\n", " 0.00841794]), 'mean_test_score': array([0.89285714, 0.95535714, 0.94642857, 0.96428571, 0.96428571,\n", " 0.95535714]), 'split3_test_score': array([0.92857143, 0.89285714, 0.89285714, 0.89285714, 0.89285714,\n", " 0.92857143]), 'rank_test_score': array([6, 3, 5, 1, 1, 3], dtype=int32), 'split1_test_score': array([0.92857143, 0.96428571, 0.96428571, 1. , 1. ,\n", " 0.96428571]), 'std_fit_time': array([6.65989292e-05, 7.14460567e-06, 5.84338354e-06, 4.58917059e-06,\n", " 8.89687927e-06, 2.05769875e-05]), 'split3_train_score': array([0.9047619 , 0.96428571, 0.97619048, 1. , 1. ,\n", " 0.98809524]), 'split0_train_score': array([0.9047619 , 0.97619048, 0.97619048, 0.97619048, 0.97619048,\n", " 0.97619048]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "5\n", "{'std_score_time': array([1.85062584e-06, 1.10550147e-06, 4.28162198e-05, 1.62142164e-06,\n", " 9.29144458e-07, 1.37349589e-06]), 'split2_train_score': array([0.98809524, 0.98809524, 0.98809524, 0.98809524, 0.98809524,\n", " 0.98809524]), 'split0_test_score': array([0.96428571, 0.96428571, 0.96428571, 1. , 1. ,\n", " 0.96428571]), 'split1_train_score': array([0.92857143, 0.96428571, 0.96428571, 0.97619048, 0.97619048,\n", " 0.97619048]), 'split2_test_score': array([0.82142857, 0.96428571, 0.96428571, 0.96428571, 0.96428571,\n", " 0.96428571]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00018179, 0.00016308, 0.00018311, 0.00015223, 0.00014824,\n", " 0.00015038]), 'mean_train_score': array([0.9375 , 0.97916667, 0.98214286, 0.98511905, 0.98511905,\n", " 0.98214286]), 'mean_fit_time': array([0.00041962, 0.00029922, 0.00027883, 0.00028938, 0.00025576,\n", " 0.00028121]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.08134316, 0.03891874, 0.03891874, 0.02961272, 0.02961272,\n", " 0.02525381]), 'std_train_score': array([0.03078595, 0.00987091, 0.01030983, 0.00515491, 0.00515491,\n", " 0.00595238]), 'mean_test_score': array([0.88392857, 0.95535714, 0.95535714, 0.97321429, 0.97321429,\n", " 0.96428571]), 'split3_test_score': array([0.96428571, 1. , 1. , 1. , 1. ,\n", " 1. ]), 'rank_test_score': array([6, 4, 4, 1, 1, 3], dtype=int32), 'split1_test_score': array([0.78571429, 0.89285714, 0.89285714, 0.92857143, 0.92857143,\n", " 0.92857143]), 'std_fit_time': array([7.58356620e-05, 7.24925794e-06, 4.00550309e-06, 2.77644424e-05,\n", " 4.33887425e-06, 1.71334067e-05]), 'split3_train_score': array([0.92857143, 0.97619048, 0.98809524, 0.98809524, 0.98809524,\n", " 0.97619048]), 'split0_train_score': array([0.9047619 , 0.98809524, 0.98809524, 0.98809524, 0.98809524,\n", " 0.98809524]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "6\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{'std_score_time': array([3.09198197e-06, 1.37349589e-06, 2.29303647e-06, 9.31054315e-07,\n", " 6.05123955e-05, 1.57039859e-05]), 'split2_train_score': array([0.89285714, 0.95238095, 0.96428571, 0.96428571, 0.96428571,\n", " 0.96428571]), 'split0_test_score': array([0.92857143, 0.89285714, 0.89285714, 0.92857143, 0.92857143,\n", " 0.92857143]), 'split1_train_score': array([0.92857143, 0.96428571, 0.96428571, 0.98809524, 0.97619048,\n", " 0.98809524]), 'split2_test_score': array([0.96428571, 1. , 1. , 1. , 1. ,\n", " 0.96428571]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00017935, 0.00016373, 0.00015998, 0.00015104, 0.00022823,\n", " 0.00016546]), 'mean_train_score': array([0.91071429, 0.9702381 , 0.97619048, 0.97916667, 0.97619048,\n", " 0.98214286]), 'mean_fit_time': array([0.00038064, 0.00030971, 0.00028497, 0.0002746 , 0.00040722,\n", " 0.00030589]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.05282214, 0.05357143, 0.04639422, 0.02961272, 0.02961272,\n", " 0.02525381]), 'std_train_score': array([0.01785714, 0.01785714, 0.0145803 , 0.00987091, 0.00841794,\n", " 0.01030983]), 'mean_test_score': array([0.9375 , 0.94642857, 0.95535714, 0.97321429, 0.97321429,\n", " 0.96428571]), 'split3_test_score': array([1., 1., 1., 1., 1., 1.]), 'rank_test_score': array([6, 5, 4, 1, 1, 3], dtype=int32), 'split1_test_score': array([0.85714286, 0.89285714, 0.92857143, 0.96428571, 0.96428571,\n", " 0.96428571]), 'std_fit_time': array([2.67222985e-05, 9.33340985e-06, 8.74767969e-06, 9.21446169e-06,\n", " 5.52532164e-05, 1.68831669e-05]), 'split3_train_score': array([0.89285714, 0.96428571, 0.97619048, 0.97619048, 0.97619048,\n", " 0.98809524]), 'split0_train_score': array([0.92857143, 1. , 1. , 0.98809524, 0.98809524,\n", " 0.98809524]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "7\n", "{'std_score_time': array([1.38380370e-06, 1.73672634e-05, 1.37349589e-06, 6.54322188e-06,\n", " 1.01152436e-06, 8.31870747e-05]), 'split2_train_score': array([0.88095238, 0.98809524, 0.97619048, 0.98809524, 0.97619048,\n", " 0.98809524]), 'split0_test_score': array([0.78571429, 1. , 1. , 1. , 1. ,\n", " 1. ]), 'split1_train_score': array([0.94047619, 0.92857143, 0.95238095, 0.98809524, 0.98809524,\n", " 0.98809524]), 'split2_test_score': array([0.89285714, 0.96428571, 0.96428571, 1. , 1. ,\n", " 1. ]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00018364, 0.00017959, 0.00016695, 0.0001623 , 0.0001545 ,\n", " 0.00025201]), 'mean_train_score': array([0.89583333, 0.9672619 , 0.9702381 , 0.98809524, 0.98511905,\n", " 0.99107143]), 'mean_fit_time': array([0.00041223, 0.00032604, 0.000305 , 0.00029951, 0.00030398,\n", " 0.00040394]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.05357143, 0.02525381, 0.02961272, 0.02961272, 0.02961272,\n", " 0.01785714]), 'std_train_score': array([0.05670404, 0.02436117, 0.01330993, 0. , 0.00515491,\n", " 0.00515491]), 'mean_test_score': array([0.875 , 0.96428571, 0.97321429, 0.97321429, 0.97321429,\n", " 0.98214286]), 'split3_test_score': array([0.89285714, 0.92857143, 0.92857143, 0.92857143, 0.92857143,\n", " 0.96428571]), 'rank_test_score': array([6, 5, 2, 2, 2, 1], dtype=int32), 'split1_test_score': array([0.92857143, 0.96428571, 1. , 0.96428571, 0.96428571,\n", " 0.96428571]), 'std_fit_time': array([4.18887941e-05, 1.97113921e-05, 6.32904785e-06, 1.24610777e-05,\n", " 4.97184205e-05, 8.18184646e-05]), 'split3_train_score': array([0.95238095, 0.98809524, 0.98809524, 0.98809524, 0.98809524,\n", " 1. ]), 'split0_train_score': array([0.80952381, 0.96428571, 0.96428571, 0.98809524, 0.98809524,\n", " 0.98809524]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "8\n", "{'std_score_time': array([7.51373230e-06, 1.75252860e-05, 1.23119306e-05, 1.40419233e-06,\n", " 1.33812096e-06, 2.89677960e-05]), 'split2_train_score': array([0.89285714, 0.95238095, 0.95238095, 0.98809524, 0.98809524,\n", " 0.98809524]), 'split0_test_score': array([0.92857143, 0.92857143, 0.92857143, 0.92857143, 0.96428571,\n", " 0.96428571]), 'split1_train_score': array([0.92857143, 0.96428571, 0.96428571, 1. , 0.98809524,\n", " 1. ]), 'split2_test_score': array([0.92857143, 0.96428571, 0.96428571, 1. , 1. ,\n", " 1. ]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.0001846 , 0.00017673, 0.00015813, 0.00014585, 0.00014281,\n", " 0.00016087]), 'mean_train_score': array([0.92559524, 0.9672619 , 0.9702381 , 0.99107143, 0.98809524,\n", " 0.99404762]), 'mean_fit_time': array([0.00045431, 0.00032181, 0.00026774, 0.00026602, 0.00025386,\n", " 0.00027066]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.04374089, 0.03992979, 0.02961272, 0.02961272, 0.01785714,\n", " 0.02525381]), 'std_train_score': array([0.01951619, 0.01297291, 0.01330993, 0.00515491, 0. ,\n", " 0.00595238]), 'mean_test_score': array([0.89285714, 0.94642857, 0.95535714, 0.97321429, 0.98214286,\n", " 0.96428571]), 'split3_test_score': array([0.89285714, 1. , 1. , 1. , 1. ,\n", " 0.92857143]), 'rank_test_score': array([6, 5, 4, 2, 1, 3], dtype=int32), 'split1_test_score': array([0.82142857, 0.89285714, 0.92857143, 0.96428571, 0.96428571,\n", " 0.96428571]), 'std_fit_time': array([1.30075909e-04, 4.45017634e-05, 4.03202403e-06, 3.17363251e-06,\n", " 2.87773793e-06, 1.10786100e-05]), 'split3_train_score': array([0.94047619, 0.96428571, 0.97619048, 0.98809524, 0.98809524,\n", " 1. ]), 'split0_train_score': array([0.94047619, 0.98809524, 0.98809524, 0.98809524, 0.98809524,\n", " 0.98809524]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n", "\n", "9\n", "{'std_score_time': array([1.12099525e-05, 1.45513482e-06, 9.72341588e-06, 1.22007665e-06,\n", " 1.34241517e-05, 1.19803853e-06]), 'split2_train_score': array([0.92857143, 0.96428571, 0.96428571, 0.96428571, 0.96428571,\n", " 0.97619048]), 'split0_test_score': array([0.78571429, 0.89285714, 0.89285714, 0.89285714, 0.89285714,\n", " 0.89285714]), 'split1_train_score': array([0.89285714, 0.96428571, 0.95238095, 0.97619048, 0.97619048,\n", " 0.98809524]), 'split2_test_score': array([0.92857143, 0.96428571, 0.96428571, 0.96428571, 0.96428571,\n", " 0.92857143]), 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object), 'mean_score_time': array([0.00018686, 0.0001651 , 0.00016868, 0.0001542 , 0.00015867,\n", " 0.00015223]), 'mean_train_score': array([0.90178571, 0.96130952, 0.9672619 , 0.97619048, 0.97619048,\n", " 0.98511905]), 'mean_fit_time': array([0.0004074 , 0.00030303, 0.00029409, 0.00029194, 0.00027001,\n", " 0.00028688]), 'params': [{'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 100, 'gamma': 0.01}, {'C': 100, 'gamma': 0.1}], 'std_test_score': array([0.06858166, 0.03891874, 0.03891874, 0.04374089, 0.04374089,\n", " 0.03992979]), 'std_train_score': array([0.02286055, 0.01297291, 0.01297291, 0.00841794, 0.00841794,\n", " 0.00987091]), 'mean_test_score': array([0.88392857, 0.95535714, 0.95535714, 0.96428571, 0.96428571,\n", " 0.94642857]), 'split3_test_score': array([0.96428571, 1. , 1. , 1. , 1. ,\n", " 1. ]), 'rank_test_score': array([6, 3, 3, 1, 1, 5], dtype=int32), 'split1_test_score': array([0.85714286, 0.96428571, 0.96428571, 1. , 1. ,\n", " 0.96428571]), 'std_fit_time': array([4.94338875e-05, 4.75942249e-06, 1.19803853e-06, 5.52107629e-06,\n", " 6.87187531e-06, 1.12618083e-05]), 'split3_train_score': array([0.91666667, 0.94047619, 0.96428571, 0.97619048, 0.97619048,\n", " 0.97619048]), 'split0_train_score': array([0.86904762, 0.97619048, 0.98809524, 0.98809524, 0.98809524,\n", " 1. ]), 'param_gamma': masked_array(data=[0.01, 0.1, 0.01, 0.1, 0.01, 0.1],\n", " mask=[False, False, False, False, False, False],\n", " fill_value='?',\n", " dtype=object)}\n" ] } ], "source": [ "from sklearn.datasets import load_iris\n", "from matplotlib import pyplot as plt\n", "from sklearn.svm import SVC\n", "from sklearn.model_selection import GridSearchCV, cross_val_score, KFold, train_test_split\n", "import numpy as np\n", "\n", "print(__doc__)\n", "\n", "# Number of random trials\n", "NUM_TRIALS = 10\n", "\n", "# Load the dataset\n", "iris = load_iris()\n", "X_iris = iris.data\n", "y_iris = iris.target\n", "\n", "# Set up possible values of parameters to optimize over\n", "p_grid = {\"C\": [1, 10, 100],\n", " \"gamma\": [.01, .1]}\n", "\n", "# We will use a Support Vector Classifier with \"rbf\" kernel\n", "svm = SVC(kernel=\"rbf\")\n", "\n", "# Arrays to store scores\n", "val_scores = np.zeros((NUM_TRIALS,len(p_grid['C'])))\n", "test_scores = np.zeros((NUM_TRIALS,len(p_grid['C'])))\n", "\n", "best_params = []\n", "# Loop for each trial\n", "for i in range(NUM_TRIALS): #Test set level\n", " print()\n", " print(i)\n", " X_app,X_test,y_app,y_test = train_test_split(X_iris,y_iris)\n", " inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n", " # Non_nested parameter search and scoring\n", " clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n", " clf.fit(X_app, y_app)\n", " print(clf.cv_results_)\n", " best_params.append(clf.best_params_)\n", " val_scores[i] = clf.best_score_\n", " test_scores[i] = clf.score(X_test,y_test)\n", "\n", "final_performance = np.mean(test_scores)\n" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "import networkx as nx\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.utils.utils import kernel_train_test\n", "\n", "from sklearn.model_selection import GridSearchCV, KFold, cross_val_score, cross_validate\n", "from sklearn.kernel_ridge import KernelRidge\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import accuracy_score, mean_squared_error, make_scorer\n", "\n", "from pygraph.kernels.treeletKernel import treeletkernel\n", "from pygraph.kernels.pathKernel import pathkernel\n", "from pygraph.kernels.spKernel import spkernel\n", "\n", "import numpy as np\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "\n", "dataset, y = loadDataset(datafile)\n", "\n", "Kmatrix, run_time = treeletkernel(dataset)\n", "\n", "import matplotlib.pyplot as plt\n", "plt.matshow(Kmatrix)\n", "\n", "# Set the training, test\n", "# Note: the percentage can be set up by the user\n", "datasize = len(y)\n", "num_train = int((datasize * 90) / 100) # 90% (of entire dataset) for training\n", "num_test = datasize - num_train # 10% (of entire dataset) for test\n", "\n", "# Split the kernel matrix\n", "Kmatrix_train = Kmatrix[0:num_train, 0:num_train]\n", "Kmatrix_test = Kmatrix[num_train:datasize, 0:num_train]\n", "\n", "# Split the targets\n", "y_train = y[0:num_train]\n", "y_test = y[num_train:datasize]\n", "\n", "# Set the parameters by cross-validation\n", "tuned_parameters = [{'alpha': np.logspace(-10, 10, num = 100, base = 10)}]\n", "\n", "print(\"# Tuning hyper-parameters\")\n", "print()\n", "\n", "def loss_func_rmse(x_true, x_pred):\n", " return np.sqrt(mean_squared_error(x_true, x_pred))\n", "\n", "loss_rmse = make_scorer(loss_func_rmse, greater_is_better=False)\n", "inner_cv = KFold(n_splits=10, shuffle=True, random_state=10)\n", "outer_cv = KFold(n_splits=10, shuffle=True, random_state=10)\n", "\n", "kr = KernelRidge(kernel='precomputed')\n", "cv = GridSearchCV(estimator=kr, param_grid=tuned_parameters, cv=inner_cv, \n", " scoring=loss_rmse, return_train_score=True)\n", "cv.fit(Kmatrix, y)\n", "cv.get_params()\n", "\n", "print(\"Best parameters set found on development set:\")\n", "print()\n", "print(cv.best_params_)\n", "print()\n", "print(\"Best score set found on development set:\")\n", "print()\n", "print(-cv.best_score_)\n", "print()\n", "print(\"Grid scores on development set:\")\n", "print()\n", "means_train = cv.cv_results_['mean_train_score']\n", "stds_train = cv.cv_results_['std_train_score']\n", "means_test = cv.cv_results_['mean_test_score']\n", "stds_test = cv.cv_results_['std_test_score']\n", "for means_train, stds_train, means_test, stds_test, params \\\n", " in zip(means_train, stds_train, means_test, stds_test, cv.cv_results_['params']):\n", " print(\"train: %0.3f (+/-%0.03f) | test: %0.3f (+/-%0.03f) for %r\"\n", " % (-means_train, stds_train, -means_test, stds_test, params))\n", "print()\n", "\n", "print(Kmatrix.shape)\n", "# Nested CV with parameter optimization\n", "nested_score = cross_val_score(estimator=cv, X=Kmatrix, y=y, cv=outer_cv, \n", " scoring=loss_rmse)\n", "print(nested_score)\n", "# nested_scores[i] = nested_score.mean()\n", "\n", "# print(\"Detailed classification report:\")\n", "# print()\n", "# print(\"The model is trained on the full development set.\")\n", "# print(\"The scores are computed on the full evaluation set.\")\n", "# print()\n", "# # predict on the train and test set\n", "# y_pred_train = cv.predict(Kmatrix_train)\n", "# y_pred_test = cv.predict(Kmatrix_test)\n", "\n", "\n", "# print('y_train: ', y_train)\n", "# print('y_pred_train: ', y_pred_train)\n", "# print('y_test: ', y_test)\n", "# print('y_pred_test: ', y_pred_test)\n", "\n", "# # root mean squared error on train set\n", "# accuracy_train = np.sqrt(mean_squared_error(y_train, y_pred_train))\n", "# accuracy_test = np.sqrt(mean_squared_error(y_test, y_pred_test))\n", "# print('\\n Mean performance on train set: %3f' % accuracy_train)\n", "# print('\\n Mean performance on test set: %3f' % accuracy_test)\n", "\n", "# print()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ljia/.local/lib/python3.5/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", " \"This module will be removed in 0.20.\", DeprecationWarning)\n", "/home/ljia/.local/lib/python3.5/site-packages/sklearn/grid_search.py:42: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20.\n", " DeprecationWarning)\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.base import BaseEstimator\n", "from sklearn.grid_search import GridSearchCV\n", "from sklearn.datasets.samples_generator import make_classification\n", "from sklearn.svm import LinearSVC, SVC\n", "from sklearn.metrics import f1_score, precision_score\n", "\n", "X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)\n", "# compute the training kernel matrix corresponding to the linear kernel\n", "K_train = np.dot(X_[:180], X_[:180].T)\n", "y_train = y_[:180]\n", "clf = SVC(kernel='precomputed')\n", "cv = GridSearchCV(clf, {'C': [0.1, 1.0]})\n", "cv.fit(K_train, y_train)\n", "# compute the test kernel matrix\n", "K_test = np.dot(X_[180:], X_[:180].T)\n", "y_test = y_[180:]\n", "y_pred = cv.predict(K_test)\n", "np.mean(y_pred == y_test) >= 0" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2 3] [0 1]\n", "[0 1] [2 3]\n" ] } ], "source": [ "import numpy as np\n", "from sklearn.model_selection import KFold\n", "\n", "X = [\"a\", \"b\", \"c\", \"d\"]\n", "kf = KFold(n_splits=2)\n", "for train, test in kf.split(X):\n", " print(\"%s %s\" % (train, test))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9733333333333334" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import datasets\n", "from sklearn import svm\n", "\n", "iris = datasets.load_iris()\n", "iris.data.shape, iris.target.shape\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " iris.data, iris.target, test_size=0.4, random_state=0)\n", "\n", "X_train.shape, y_train.shape\n", "\n", "X_test.shape, y_test.shape\n", "\n", "\n", "clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)\n", "clf.score(X_test, y_test)\n", "\n", "\n", "from sklearn.model_selection import cross_val_score, cross_validate\n", "clf = svm.SVC(kernel='linear', C=1)\n", "scores = cross_validate(clf, iris.data, iris.target, cv=5, return_train_score=True)\n", "scores \n", "# print(\"Accuracy: %0.2f (+/- %0.2f)\" % (scores.mean(), scores.std() * 2))\n", "\n", "\n", "from sklearn.model_selection import cross_val_predict\n", "from sklearn.metrics import accuracy_score\n", "predicted = cross_val_predict(clf, iris.data, iris.target, cv=10)\n", "accuracy_score(iris.target, predicted) \n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Automatically created module for IPython interactive environment\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=0, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 1, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.913 (+/-0.085) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.079) for {'C': 1, 'gamma': 0.1}\n", "0.960 (+/-0.079) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 10, 'gamma': 0.1}\n", "0.960 (+/-0.060) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.038) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=1, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.927 (+/-0.101) for {'C': 1, 'gamma': 0.01}\n", "0.947 (+/-0.064) for {'C': 1, 'gamma': 0.1}\n", "0.940 (+/-0.086) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.027) for {'C': 10, 'gamma': 0.1}\n", "0.960 (+/-0.027) for {'C': 100, 'gamma': 0.01}\n", "0.933 (+/-0.080) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=2, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.920 (+/-0.039) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 1, 'gamma': 0.1}\n", "0.960 (+/-0.060) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.059) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.066) for {'C': 100, 'gamma': 0.01}\n", "0.953 (+/-0.070) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=3, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.940 (+/-0.117) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.046) for {'C': 10, 'gamma': 0.1}\n", "0.960 (+/-0.046) for {'C': 100, 'gamma': 0.01}\n", "0.940 (+/-0.059) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=4, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.913 (+/-0.042) for {'C': 1, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 1, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.044) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.038) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.037) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=5, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.947 (+/-0.066) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.059) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.980 (+/-0.023) for {'C': 10, 'gamma': 0.1}\n", "0.980 (+/-0.023) for {'C': 100, 'gamma': 0.01}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=6, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.920 (+/-0.084) for {'C': 1, 'gamma': 0.01}\n", "0.947 (+/-0.064) for {'C': 1, 'gamma': 0.1}\n", "0.953 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.001) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.039) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=7, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.947 (+/-0.124) for {'C': 1, 'gamma': 0.01}\n", "0.967 (+/-0.087) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.058) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.058) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.01}\n", "0.933 (+/-0.096) for {'C': 100, 'gamma': 0.1}\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=8, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.940 (+/-0.078) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.026) for {'C': 1, 'gamma': 0.1}\n", "0.960 (+/-0.059) for {'C': 10, 'gamma': 0.01}\n", "0.973 (+/-0.065) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.084) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=9, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.927 (+/-0.129) for {'C': 1, 'gamma': 0.01}\n", "0.953 (+/-0.045) for {'C': 1, 'gamma': 0.1}\n", "0.960 (+/-0.028) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.059) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.038) for {'C': 100, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=10, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.920 (+/-0.101) for {'C': 1, 'gamma': 0.01}\n", "0.953 (+/-0.044) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.973 (+/-0.065) for {'C': 10, 'gamma': 0.1}\n", "0.980 (+/-0.044) for {'C': 100, 'gamma': 0.01}\n", "0.973 (+/-0.065) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=11, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.947 (+/-0.083) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.026) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.043) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.1}\n", "0.967 (+/-0.022) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.037) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=12, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 1, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.927 (+/-0.069) for {'C': 1, 'gamma': 0.01}\n", "0.973 (+/-0.037) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.027) for {'C': 10, 'gamma': 0.1}\n", "0.967 (+/-0.022) for {'C': 100, 'gamma': 0.01}\n", "0.940 (+/-0.024) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=13, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.920 (+/-0.091) for {'C': 1, 'gamma': 0.01}\n", "0.973 (+/-0.037) for {'C': 1, 'gamma': 0.1}\n", "0.980 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.059) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.053) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.065) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=14, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.920 (+/-0.086) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.027) for {'C': 1, 'gamma': 0.1}\n", "0.960 (+/-0.027) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.1}\n", "0.980 (+/-0.044) for {'C': 100, 'gamma': 0.01}\n", "0.960 (+/-0.059) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=15, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 1, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.953 (+/-0.044) for {'C': 1, 'gamma': 0.01}\n", "0.973 (+/-0.037) for {'C': 1, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 10, 'gamma': 0.01}\n", "0.973 (+/-0.038) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.038) for {'C': 100, 'gamma': 0.01}\n", "0.953 (+/-0.059) for {'C': 100, 'gamma': 0.1}\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=16, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.900 (+/-0.088) for {'C': 1, 'gamma': 0.01}\n", "0.953 (+/-0.024) for {'C': 1, 'gamma': 0.1}\n", "0.960 (+/-0.027) for {'C': 10, 'gamma': 0.01}\n", "0.973 (+/-0.037) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.01}\n", "0.940 (+/-0.057) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=17, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.920 (+/-0.084) for {'C': 1, 'gamma': 0.01}\n", "0.967 (+/-0.059) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.059) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.044) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.01}\n", "0.940 (+/-0.043) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=18, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 1, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.927 (+/-0.059) for {'C': 1, 'gamma': 0.01}\n", "0.973 (+/-0.054) for {'C': 1, 'gamma': 0.1}\n", "0.973 (+/-0.054) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.045) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.038) for {'C': 100, 'gamma': 0.01}\n", "0.953 (+/-0.070) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=19, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.907 (+/-0.057) for {'C': 1, 'gamma': 0.01}\n", "0.967 (+/-0.057) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.043) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.1}\n", "0.980 (+/-0.023) for {'C': 100, 'gamma': 0.01}\n", "0.960 (+/-0.027) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=20, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.893 (+/-0.107) for {'C': 1, 'gamma': 0.01}\n", "0.953 (+/-0.059) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.059) for {'C': 10, 'gamma': 0.01}\n", "0.973 (+/-0.038) for {'C': 10, 'gamma': 0.1}\n", "0.980 (+/-0.023) for {'C': 100, 'gamma': 0.01}\n", "0.953 (+/-0.080) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=21, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.947 (+/-0.038) for {'C': 1, 'gamma': 0.01}\n", "0.973 (+/-0.065) for {'C': 1, 'gamma': 0.1}\n", "0.980 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.980 (+/-0.044) for {'C': 10, 'gamma': 0.1}\n", "0.987 (+/-0.046) for {'C': 100, 'gamma': 0.01}\n", "0.973 (+/-0.065) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=22, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.913 (+/-0.078) for {'C': 1, 'gamma': 0.01}\n", "0.940 (+/-0.044) for {'C': 1, 'gamma': 0.1}\n", "0.953 (+/-0.043) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.059) for {'C': 10, 'gamma': 0.1}\n", "0.953 (+/-0.043) for {'C': 100, 'gamma': 0.01}\n", "0.960 (+/-0.045) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=23, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.940 (+/-0.057) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.027) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.1}\n", "0.967 (+/-0.022) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.091) for {'C': 100, 'gamma': 0.1}\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=24, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.907 (+/-0.048) for {'C': 1, 'gamma': 0.01}\n", "0.953 (+/-0.070) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.059) for {'C': 10, 'gamma': 0.01}\n", "0.973 (+/-0.037) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.01}\n", "0.947 (+/-0.066) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=25, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.913 (+/-0.069) for {'C': 1, 'gamma': 0.01}\n", "0.953 (+/-0.043) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 10, 'gamma': 0.1}\n", "0.980 (+/-0.044) for {'C': 100, 'gamma': 0.01}\n", "0.953 (+/-0.068) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=26, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 1, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.907 (+/-0.091) for {'C': 1, 'gamma': 0.01}\n", "0.967 (+/-0.059) for {'C': 1, 'gamma': 0.1}\n", "0.967 (+/-0.059) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 10, 'gamma': 0.1}\n", "0.967 (+/-0.059) for {'C': 100, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=27, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 1, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.947 (+/-0.001) for {'C': 1, 'gamma': 0.01}\n", "0.980 (+/-0.044) for {'C': 1, 'gamma': 0.1}\n", "0.980 (+/-0.044) for {'C': 10, 'gamma': 0.01}\n", "0.980 (+/-0.023) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.01}\n", "0.967 (+/-0.044) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=28, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 100, 'gamma': 0.01}\n", "\n", "Grid scores on development set:\n", "\n", "0.933 (+/-0.060) for {'C': 1, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 1, 'gamma': 0.1}\n", "0.960 (+/-0.060) for {'C': 10, 'gamma': 0.01}\n", "0.960 (+/-0.060) for {'C': 10, 'gamma': 0.1}\n", "0.973 (+/-0.037) for {'C': 100, 'gamma': 0.01}\n", "0.960 (+/-0.089) for {'C': 100, 'gamma': 0.1}\n", "\n", "clf.params: {'error_score': 'raise', 'estimator__probability': False, 'estimator__gamma': 'auto', 'estimator__class_weight': None, 'pre_dispatch': '2*n_jobs', 'estimator__random_state': None, 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False), 'estimator__tol': 0.001, 'estimator__shrinking': True, 'n_jobs': 1, 'param_grid': {'C': [1, 10, 100], 'gamma': [0.01, 0.1]}, 'estimator__max_iter': -1, 'estimator__verbose': False, 'iid': True, 'estimator__degree': 3, 'estimator__C': 1.0, 'fit_params': None, 'estimator__kernel': 'rbf', 'cv': KFold(n_splits=4, random_state=29, shuffle=True), 'scoring': None, 'verbose': 0, 'refit': True, 'return_train_score': 'warn', 'estimator__decision_function_shape': 'ovr', 'estimator__coef0': 0.0, 'estimator__cache_size': 200}\n", "\n", "Best parameters set found on development set:\n", "\n", "{'C': 10, 'gamma': 0.1}\n", "\n", "Grid scores on development set:\n", "\n", "0.940 (+/-0.022) for {'C': 1, 'gamma': 0.01}\n", "0.967 (+/-0.022) for {'C': 1, 'gamma': 0.1}\n", "0.973 (+/-0.001) for {'C': 10, 'gamma': 0.01}\n", "0.980 (+/-0.044) for {'C': 10, 'gamma': 0.1}\n", "0.980 (+/-0.044) for {'C': 100, 'gamma': 0.01}\n", "0.953 (+/-0.043) for {'C': 100, 'gamma': 0.1}\n", "\n", "Average difference of 0.007742 with std. dev. of 0.007688.\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.datasets import load_iris\n", "from matplotlib import pyplot as plt\n", "from sklearn.svm import SVC\n", "from sklearn.model_selection import GridSearchCV, cross_val_score, KFold\n", "import numpy as np\n", "\n", "print(__doc__)\n", "\n", "# Number of random trials\n", "NUM_TRIALS = 30\n", "\n", "# Load the dataset\n", "iris = load_iris()\n", "X_iris = iris.data\n", "y_iris = iris.target\n", "\n", "# Set up possible values of parameters to optimize over\n", "p_grid = {\"C\": [1, 10, 100],\n", " \"gamma\": [.01, .1]}\n", "\n", "# We will use a Support Vector Classifier with \"rbf\" kernel\n", "svm = SVC(kernel=\"rbf\")\n", "\n", "# Arrays to store scores\n", "non_nested_scores = np.zeros(NUM_TRIALS)\n", "nested_scores = np.zeros(NUM_TRIALS)\n", "\n", "# Loop for each trial\n", "for i in range(NUM_TRIALS):\n", "\n", " # Choose cross-validation techniques for the inner and outer loops,\n", " # independently of the dataset.\n", " # E.g \"LabelKFold\", \"LeaveOneOut\", \"LeaveOneLabelOut\", etc.\n", " inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n", " outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n", "\n", " # Non_nested parameter search and scoring\n", " clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n", " clf.fit(X_iris, y_iris)\n", " print('clf.params: ', clf.get_params())\n", " print()\n", " non_nested_scores[i] = clf.best_score_\n", " \n", " print(\"Best parameters set found on development set:\")\n", " print()\n", " print(clf.best_params_)\n", " print()\n", " print(\"Grid scores on development set:\")\n", " print()\n", " means = clf.cv_results_['mean_test_score']\n", " stds = clf.cv_results_['std_test_score']\n", " for mean, std, params in zip(means, stds, clf.cv_results_['params']):\n", " print(\"%0.3f (+/-%0.03f) for %r\"\n", " % (mean, std * 2, params))\n", " print()\n", "\n", " # Nested CV with parameter optimization\n", " nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n", " nested_scores[i] = nested_score.mean()\n", "\n", "score_difference = non_nested_scores - nested_scores\n", "\n", "print(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n", " .format(score_difference.mean(), score_difference.std()))\n", "\n", "# Plot scores on each trial for nested and non-nested CV\n", "plt.figure()\n", "plt.subplot(211)\n", "non_nested_scores_line, = plt.plot(non_nested_scores, color='r')\n", "nested_line, = plt.plot(nested_scores, color='b')\n", "plt.ylabel(\"score\", fontsize=\"14\")\n", "plt.legend([non_nested_scores_line, nested_line],\n", " [\"Non-Nested CV\", \"Nested CV\"],\n", " bbox_to_anchor=(0, .4, .5, 0))\n", "plt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n", " x=.5, y=1.1, fontsize=\"15\")\n", "\n", "# Plot bar chart of the difference.\n", "plt.subplot(212)\n", "difference_plot = plt.bar(range(NUM_TRIALS), score_difference)\n", "plt.xlabel(\"Individual Trial #\")\n", "plt.legend([difference_plot],\n", " [\"Non-Nested CV - Nested CV Score\"],\n", " bbox_to_anchor=(0, 1, .8, 0))\n", "plt.ylabel(\"score difference\", fontsize=\"14\")\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }