|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "scrolled": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Automatically created module for IPython interactive environment\n",
- "# Tuning hyper-parameters for precision\n",
- "\n",
- "Best parameters set found on development set:\n",
- "\n",
- "{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "\n",
- "Grid scores on development set:\n",
- "\n",
- "0.986 (+/-0.016) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.959 (+/-0.029) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.988 (+/-0.017) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.982 (+/-0.026) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.988 (+/-0.017) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.982 (+/-0.025) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.988 (+/-0.017) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.982 (+/-0.025) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.975 (+/-0.014) for {'C': 1, 'kernel': 'linear'}\n",
- "0.975 (+/-0.014) for {'C': 10, 'kernel': 'linear'}\n",
- "0.975 (+/-0.014) for {'C': 100, 'kernel': 'linear'}\n",
- "0.975 (+/-0.014) for {'C': 1000, 'kernel': 'linear'}\n",
- "\n",
- "Detailed classification report:\n",
- "\n",
- "The model is trained on the full development set.\n",
- "The scores are computed on the full evaluation set.\n",
- "\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 1.00 1.00 1.00 89\n",
- " 1 0.97 1.00 0.98 90\n",
- " 2 0.99 0.98 0.98 92\n",
- " 3 1.00 0.99 0.99 93\n",
- " 4 1.00 1.00 1.00 76\n",
- " 5 0.99 0.98 0.99 108\n",
- " 6 0.99 1.00 0.99 89\n",
- " 7 0.99 1.00 0.99 78\n",
- " 8 1.00 0.98 0.99 92\n",
- " 9 0.99 0.99 0.99 92\n",
- "\n",
- " micro avg 0.99 0.99 0.99 899\n",
- " macro avg 0.99 0.99 0.99 899\n",
- "weighted avg 0.99 0.99 0.99 899\n",
- "\n",
- "\n",
- "# Tuning hyper-parameters for recall\n",
- "\n",
- "Best parameters set found on development set:\n",
- "\n",
- "{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "\n",
- "Grid scores on development set:\n",
- "\n",
- "0.986 (+/-0.019) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.957 (+/-0.029) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.987 (+/-0.019) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.981 (+/-0.028) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.987 (+/-0.019) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.981 (+/-0.026) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.987 (+/-0.019) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}\n",
- "0.981 (+/-0.026) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}\n",
- "0.972 (+/-0.012) for {'C': 1, 'kernel': 'linear'}\n",
- "0.972 (+/-0.012) for {'C': 10, 'kernel': 'linear'}\n",
- "0.972 (+/-0.012) for {'C': 100, 'kernel': 'linear'}\n",
- "0.972 (+/-0.012) for {'C': 1000, 'kernel': 'linear'}\n",
- "\n",
- "Detailed classification report:\n",
- "\n",
- "The model is trained on the full development set.\n",
- "The scores are computed on the full evaluation set.\n",
- "\n",
- " precision recall f1-score support\n",
- "\n",
- " 0 1.00 1.00 1.00 89\n",
- " 1 0.97 1.00 0.98 90\n",
- " 2 0.99 0.98 0.98 92\n",
- " 3 1.00 0.99 0.99 93\n",
- " 4 1.00 1.00 1.00 76\n",
- " 5 0.99 0.98 0.99 108\n",
- " 6 0.99 1.00 0.99 89\n",
- " 7 0.99 1.00 0.99 78\n",
- " 8 1.00 0.98 0.99 92\n",
- " 9 0.99 0.99 0.99 92\n",
- "\n",
- " micro avg 0.99 0.99 0.99 899\n",
- " macro avg 0.99 0.99 0.99 899\n",
- "weighted avg 0.99 0.99 0.99 899\n",
- "\n",
- "\n"
- ]
- }
- ],
- "source": [
- "# Parameter estimation using grid search with cross-validation\n",
- "from __future__ import print_function\n",
- "\n",
- "from sklearn import datasets\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.model_selection import GridSearchCV\n",
- "from sklearn.metrics import classification_report\n",
- "from sklearn.svm import SVC\n",
- "\n",
- "print(__doc__)\n",
- "\n",
- "# Loading the Digits dataset\n",
- "digits = datasets.load_digits()\n",
- "\n",
- "# To apply an classifier on this data, we need to flatten the image, to\n",
- "# turn the data in a (samples, feature) matrix:\n",
- "n_samples = len(digits.images)\n",
- "X = digits.images.reshape((n_samples, -1))\n",
- "y = digits.target\n",
- "\n",
- "# Split the dataset in two equal parts\n",
- "X_train, X_test, y_train, y_test = train_test_split(\n",
- " X, y, test_size=0.5, random_state=0)\n",
- "\n",
- "# Set the parameters by cross-validation\n",
- "tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],\n",
- " 'C': [1, 10, 100, 1000]},\n",
- " {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]\n",
- "\n",
- "scores = ['precision', 'recall']\n",
- "\n",
- "for score in scores:\n",
- " print(\"# Tuning hyper-parameters for %s\" % score)\n",
- " print()\n",
- "\n",
- " clf = GridSearchCV(SVC(), tuned_parameters, cv=5,\n",
- " scoring='%s_macro' % score)\n",
- " clf.fit(X_train, y_train)\n",
- "\n",
- " print(\"Best parameters set found on development set:\")\n",
- " print()\n",
- " print(clf.best_params_)\n",
- " print()\n",
- " print(\"Grid scores on development set:\")\n",
- " print()\n",
- " means = clf.cv_results_['mean_test_score']\n",
- " stds = clf.cv_results_['std_test_score']\n",
- " for mean, std, params in zip(means, stds, clf.cv_results_['params']):\n",
- " print(\"%0.3f (+/-%0.03f) for %r\"\n",
- " % (mean, std * 2, params))\n",
- " print()\n",
- "\n",
- " print(\"Detailed classification report:\")\n",
- " print()\n",
- " print(\"The model is trained on the full development set.\")\n",
- " print(\"The scores are computed on the full evaluation set.\")\n",
- " print()\n",
- " y_true, y_pred = y_test, clf.predict(X_test)\n",
- " print(classification_report(y_true, y_pred))\n",
- " print()\n",
- "\n",
- "# Note the problem is too easy: the hyperparameter plateau is too flat and the\n",
- "# output model is the same for precision and recall with ties in quality."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_split.py:1943: FutureWarning: You should specify a value for 'cv' instead of relying on the default value. The default value will change from 3 to 5 in version 0.22.\n",
- " warnings.warn(CV_WARNING, FutureWarning)\n",
- "/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
- " \"avoid this warning.\", FutureWarning)\n",
- "/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
- " \"avoid this warning.\", FutureWarning)\n",
- "/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
- " \"avoid this warning.\", FutureWarning)\n",
- "/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
- " \"avoid this warning.\", FutureWarning)\n",
- "/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
- " \"avoid this warning.\", FutureWarning)\n",
- "/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
- " \"avoid this warning.\", FutureWarning)\n",
- "/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py:841: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.\n",
- " DeprecationWarning)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "GridSearchCV(cv='warn', error_score='raise-deprecating',\n",
- " estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
- " decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
- " kernel='rbf', max_iter=-1, probability=False, random_state=None,\n",
- " shrinking=True, tol=0.001, verbose=False),\n",
- " fit_params=None, iid='warn', n_jobs=None,\n",
- " param_grid={'kernel': ('linear', 'rbf'), 'C': [1, 10]},\n",
- " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
- " scoring=None, verbose=0)"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "from sklearn import svm, datasets\n",
- "from sklearn.model_selection import GridSearchCV\n",
- "iris = datasets.load_iris()\n",
- "parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]}\n",
- "svc = svm.SVC()\n",
- "clf = GridSearchCV(svc, parameters)\n",
- "clf.fit(iris.data, iris.target)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['mean_fit_time',\n",
- " 'mean_score_time',\n",
- " 'mean_test_score',\n",
- " 'mean_train_score',\n",
- " 'param_C',\n",
- " 'param_kernel',\n",
- " 'params',\n",
- " 'rank_test_score',\n",
- " 'split0_test_score',\n",
- " 'split0_train_score',\n",
- " 'split1_test_score',\n",
- " 'split1_train_score',\n",
- " 'split2_test_score',\n",
- " 'split2_train_score',\n",
- " 'std_fit_time',\n",
- " 'std_score_time',\n",
- " 'std_test_score',\n",
- " 'std_train_score']"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "sorted(clf.cv_results_.keys())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "dict_values([array([0.00032242, 0.00050696, 0.00030732, 0.00045021]), array([5.32061151e-05, 8.45866051e-05, 1.21523332e-05, 2.86382520e-05]), array([0.0001688 , 0.00019749, 0.00016387, 0.0001924 ]), array([8.85899620e-06, 4.74580085e-06, 9.39730861e-06, 5.54376603e-06]), masked_array(data=[1, 1, 10, 10],\n",
- " mask=[False, False, False, False],\n",
- " fill_value='?',\n",
- " dtype=object), masked_array(data=['linear', 'rbf', 'linear', 'rbf'],\n",
- " mask=[False, False, False, False],\n",
- " fill_value='?',\n",
- " dtype=object), [{'C': 1, 'kernel': 'linear'}, {'C': 1, 'kernel': 'rbf'}, {'C': 10, 'kernel': 'linear'}, {'C': 10, 'kernel': 'rbf'}], array([1. , 0.98039216, 1. , 0.98039216]), array([0.96078431, 0.96078431, 0.92156863, 0.96078431]), array([0.97916667, 0.97916667, 1. , 1. ]), array([0.98 , 0.97333333, 0.97333333, 0.98 ]), array([0.01617914, 0.00902067, 0.03715363, 0.01592466]), array([1, 3, 3, 1], dtype=int32), array([0.97979798, 0.96969697, 0.95959596, 0.95959596]), array([1., 1., 1., 1.]), array([0.99019608, 0.98039216, 0.98039216, 0.98039216]), array([0.98999802, 0.98336304, 0.97999604, 0.97999604]), array([0.00824863, 0.01254825, 0.01649726, 0.01649726])])"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "clf.cv_results_.values()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Automatically created module for IPython interactive environment\n"
- ]
- },
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "print(__doc__)\n",
- "\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from sklearn import svm, datasets\n",
- "\n",
- "# import some data to play with\n",
- "iris = datasets.load_iris()\n",
- "X = iris.data[:, :2] # we only take the first two features. We could\n",
- " # avoid this ugly slicing by using a two-dim dataset\n",
- "Y = iris.target\n",
- "\n",
- "\n",
- "def my_kernel(X, Y):\n",
- " \"\"\"\n",
- " We create a custom kernel:\n",
- "\n",
- " (2 0)\n",
- " k(X, Y) = X ( ) Y.T\n",
- " (0 1)\n",
- " \"\"\"\n",
- " M = np.array([[2, 0], [0, 1.0]])\n",
- " return np.dot(np.dot(X, M), Y.T)\n",
- "\n",
- "\n",
- "h = .02 # step size in the mesh\n",
- "\n",
- "# we create an instance of SVM and fit out data.\n",
- "clf = svm.SVC(kernel=my_kernel)\n",
- "clf.fit(X, Y)\n",
- "\n",
- "# Plot the decision boundary. For that, we will assign a color to each\n",
- "# point in the mesh [x_min, x_max]x[y_min, y_max].\n",
- "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
- "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
- "xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n",
- "Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
- "\n",
- "# Put the result into a color plot\n",
- "Z = Z.reshape(xx.shape)\n",
- "plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)\n",
- "\n",
- "# Plot also the training points\n",
- "plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolors='k')\n",
- "plt.title('3-Class classification using Support Vector Machine with custom'\n",
- " ' kernel')\n",
- "plt.axis('tight')\n",
- "plt.show()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|