{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Chaining a PCA and a logistic regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The PCA does an unsupervised dimensionality reduction, while the logistic regression does the prediction.\n", "\n", "We use a GridSearchCV to set the dimensionality of the PCA" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQQAAADICAYAAAAOVZ9xAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJztnXd4XNW1t98ladS7VWx1XLCxKQbcYiB2gEsgCcVcMM0EEsAmCcSQm4SWj3ppueGGkBAImGIgGAiB2Bjia2MMgbiAGzbuVZYl2you6tKMtL4/ztEgGVnSSBqdmdF+n2eeOWWfc357ZrS09t5rry2qisFgMACEOS3AYDAEDsYgGAwGL8YgGAwGL8YgGAwGL8YgGAwGL8YgGAwGL8YgGAwGL8YgGAwGL8YgGAwGLxFOC+gpaWlpWlBQ4LQMgyGgWbVqVbmqpndWLugNQkFBAStXrnRahsEQ0IhIYVfKmSaDIeh48bNdvPjZLqdlhCTGIBiCjqU7ylm6o9xpGSFJ0DcZDP2PWdeNdVpCyNIvPITaRg9mmrfB0Dkh7yFMf2UlH20uZcFtZzE0I6HDspWVlZSWluJ2u/tInaE7VNVb309CtMthJYGDy+UiIyODxMTEHt0n5A1CZEQYnmZl+c6DHRqEyspKDhw4QHZ2NjExMYhIH6o0+EJhRQ0A+QPiHFYSGKgqdXV1FBcXA/TIKIR8k2H84AEALN9Z0WG50tJSsrOziY2NNcYgwMkfEGeMQStEhNjYWLKzsyktLe3RvULeIHxrcCoAK3Yd7LAfwe12ExMT01eyDIZeJyYmpsfN3ZA3CEPS40mLj6SsqoGd5TUdljWeQXBQWlVPaVW90zICjt74/Ya8QRARxh9nNRtW7DzosBpDb1Df2ER9Y5Mjz548eTKzZs1y5Nl9QcgbBIDx3mZDx/0IhuAgb0AceZ30IXz88cfk5OT0kaLQoV8YhAmtOhZNPILBcGz6hUEYlhFPalwkByobKKyodVpOtygoKOB3v/sdJ598MklJSVxxxRXU13fejp47dy6jR48mMTGRIUOGsGDBAgBKSkq46KKLSE1NZejQoTz//PPea+6//34uv/xypk2bRkJCAieddBJbt27l0UcfJSMjg9zcXBYuXOgtP3nyZO666y7GjRtHYmIiF198MQcPft08mzdvHqNGjSI5OZnJkyezadOmLtdr/vz5jB49muTkZCZOnMi6des4UFnPgcr6Y15bU1PDBRdcQElJCfHx8cTHx1NSUsLnn3/OmDFjSExMJDMzk1/84hc+f24AhYWFnHHGGSQkJHDeeedRXv51GHVHdX388cfJzs4mISGB4cOHs3jxYgCam5t57LHHGDJkCAMGDGDq1Knez2/37t2ICLNnzyYvL4+0tDQefvjhTr/3bqOqQf06/fTTtSvMeGWl5t8xX+esKGz3/MaNG7t0H6fIz8/XsWPHanFxsVZUVOiIESP0mWee6fCaFStWaGJioi5cuFCbmpp07969umnTJlVVPeuss/QnP/mJ1tXV6Zo1azQtLU0XL16sqqr33XefRkVF6YIFC9Ttduu1116rBQUF+t///d/a2Niozz33nBYUFHifM2nSJM3KytL169drdXW1XnrppXrNNdeoquqWLVs0NjZWFy5cqI2Njfr444/rkCFDtKGhodN6rV69WtPT03X58uXq8Xj05Zdf1vz8fN1aclALK2o6vHbJkiWanZ3d5vOYMGGCvvLKK6qqWlVVpcuWLfP5c5s0aZIOHjxYt2zZorW1tTpp0iS94447Oq3r5s2bNScnR4uLi1VVddeuXbp9+3ZVVX3yySd1/PjxWlRUpPX19Tp9+nS98sorveUAvfHGG7W2tlbXrl2rkZGRx/y9Hus4sFK78Pfk+B90T19dNQgvfbZT8++Yr7e9sabd8+19kFOfXapvfbFHVVUbPU069dml+s7qIlVVrW3w6NRnl+q8tdYXfKSuUac+u1T/ub5EVVUrqht06rNLddGG/aqqeqCyTqc+u1SXbD6gqqrFh2q7pLuF/Px8ffXVV737v/rVr3TGjBkdXjN9+nS97bbbvnF8z549GhYWppWVld5jd955p1533XWqahmEc88913tu3rx5GhcXpx6PR1VVKysrFdBDhw6pqrb5o1BV3bBhg7pcLvV4PPrggw/q5Zdf7j3X1NSkWVlZumTJkk7rdfPNN+tvfvObNtqPP/54/fjjjzu9tj2DcNZZZ+m9996rZWVlHX1sx/zcWur60EMPefeffvpp/e53v6uq2mFdt23bpunp6bpo0SJtbGxsc88RI0bohx9+6N0vKSnRiIgIdbvdXoNQVFTkPT927FidM2dOu/p6ahD6RZMB2gYoaZD2IwwcONC7HRsbS3V1dYfli4qKGDJkyDeOl5SUkJqaSkLC15Gb+fn53kg3gMzMTO92TEwMaWlphIeHe/eBNs/Pzc1tcy+32015eTklJSXk5+d7z4WFhZGbm9vmWceqV2FhIU888QTJycneV1FRESUlJd36TF544QW2bt3KiBEjGDt2LPPnz2+33LE+t86e2VFdhw4dypNPPsn9999PRkYGV155pbcehYWFTJkyxVvHE044gfDwcA4cONCtevaEkA9dbmF4ZgLJsS72Haln76E6clNjO73mzRnf8m67wsPa7MdEhrfZT4x2tdlPjYtss5+REN1mPyvZ/0FQubm57Nix4xvHs7KyOHjwIFVVVV6jsGfPHrKzs7v9rKKiIu/2nj17cLlcpKWlkZWVxfr1673nVJWioqIuPSs3N5d77rmHe+65p83x/Ufq2X+k4/6T9sbkhw0bxpw5c2hubuadd97hsssuo6Kigri4tiMWx/rcOqOzul599dVcffXVVFZWMmPGDO644w5effVVcnNzefHFFznjjDO+cc/du3f7rKMn9BsPISxMODknGYCN+yodVtM33HDDDbz00kssXryY5uZmiouL2bx5M7m5uUycOJG77rqL+vp61q1bxwsvvMC0adO6/azXXnuNjRs3Ultby7333stll11GeHg4U6dO5f3332fx4sW43W6eeOIJoqKimDhxYqf3vOmmm3j22WdZsWIFqkpNTQ3vv/8+hw4fwd3U3OG1mZmZVFRUcOTIkTYay8rKCAsLIznZ+i2EhX3zT+BYn1tndFTXLVu28NFHH9HQ0EB0dDQxMTHeZ998883cc889FBZaSY3KysqYO3dup8/zB/3GIAAMz4wHYMv+KoeV9A3jxo3jpZde4vbbbycpKYlJkyZ5f3Rz5sxh9+7dZGVlMWXKFB544AHOPffcbj/r2muv5frrr2fgwIHU19fz1FNPATB8+HBee+01br31VtLS0njvvfd47733iIyM7PSeY8aM4fnnn+eWW24hJSWFoUOH8vLLL5OTGtuphzdixAiuuuoqBg8eTHJyMiUlJSxYsIBRo0YRHx/PzJkzeeONN9oNV+/oc+uIjura0NDAnXfeSVpaGgMHDqS0tJRHH30UgJkzZ3LRRRdx3nnnkZCQwIQJE1ixYkWnz/MHEqzt6RbGjBmjXc2p+LeVRfzq7XV8/+RBPH31aW3Obdq0iRNOOMEfEkOeyZMnM23aNG688UanpfR7jvU7FpFVqjqms+v7lYcwYqA1LXRrP/EQQpV9R+rYd6TOaRkhSb8yCEMz4hGBXeU1NHiciYXvbR555BFv8E3r1wUXXOC0NL/R1Kw0NQe3Zxuo9JtRBrBGBvJTY9ldUcuu8hqvxxDM3H333dx9992Oavj444/79Hk5KZ2PEBm6R7/yEACGD7SG2fpLx6LB4Au9ZhBEZFQ7x14UkVIR+arVsftFpFhE1tqv77U6d5eIbBeRLSLy3d7S1prhmcc2CMHewdpf2He4jn2HTR/C0fTG77c3PYRX2zn2MnB+O8d/r6qj7dcHACIyErgSGGVf82cRCe9FfQAcb3sIWw+0NQgul4u6OvMjCwaa1XoZ2lJXV4fL1bPEs71pEL4RGqaq/wK6mpXkYuANVW1Q1V3AdmBcL+oDWnkIRxmEjIwMiouLqa2tNZ5CgJOdEkN2ikl314KqUltbS3FxMRkZGT26V292KvryV3SLiPwQWAn8l6oeArKB5a3K7LWPfQMRmQ5MB8jLy/NJZEFaHJHhYRQdrKO6wUN8lPURtGSqLSkpMWnYDUGHy+UiMzMzKNOwPwM8hGVAHgKeAH7syw1U9TngObACk3y51hUexuD0ODbvr2LbgSpOzUvxnktMTOzxB2rwPw+8twGA+y78RreVoYf0ZpOhsSuFVPWAqjapajPwPF83C4qB3FZFc+xjvc7wY/QjGAz9nS57CGJNH7sGGKyqD4pIHjBQVT8HUNUJXbzPIFXdZ+9OAVpGIOYBr4vI/wJZwDDg867q84XjvSMN/plCavAvxjPwH740Gf4MNANnAw8CVcDfgWOuvCkic4DJQJqI7AXuAyaLyGisJsNuYAaAqm4QkbeAjYAH+Jmq+iWccITxEAyGdvHFIIxX1dNEZA2Aqh4SkQ6nrKnqVe0cfqGD8g8DfkwYZ9HiIWw2wUlByf/7h+VUPnTJiQ4rCT186UNw23EBCiAi6VgeQ9CRnRxDXGQ45dUNHK7tUteHIYCIdoUR7ep3QbZ9gi8ewlPAu0CGiDwMXAb8xi+q/ExYmDAoOYbtpdUcqGwgObbzufmGwOGe7490WkLI0mWDoKp/FZFVwDlYQUiXqOqmTi4LWNLiI9leCmVVDd5RB4Ohv+PLKMMEYIOqPm3vJ4rIeFV1JrVLD0lPiAagrNqsERhs3PXOOgAevfRkh5WEHr40xJ4BWo/TVdvHgpL0+CjA8hAMwUVybKRp5vkJX/oQRFsF+atqs4gEbT6F9ATLIJRXm07FYOOO80c4LSFk8cVD2CkiPxcRl/2aCez0lzB/02IQjIdgMHyNLwbhZmAiVjjxXmA89gSjYMQYhODll3/7kl/+7UunZYQkvowylGLlKwgJ0uKtNqgxCMFHVlK00xJCFl9GGdKBm4CC1tepqk8zFQMFr4dQbQxCsPGL84Y7LSFk8aVTcC7wKfAhEPQpiwfERREmcKi2EXdTM65wE/lmMPhiEGJV9Q6/KeljwsOE1LgoyqsbOFjTSGaicUODhdveWAPAk1ee6rCS0MOXf4vzWydEDQVMx2JwMjg9nsHp8U7LCEl88RBmAneLSAPgxgpfVlUN2hRD6QlRbNpnDEKw8fNzhjktIWTxZZQh5AL+zUiDwdAWnyINRSQFK5ORt8FtZ1YOSsxIQ3Byy+urAfjTUQv2GnqOL8OON2I1G3KAtcAEYBlWBqWgxMxnCE5GZgVtKzXg8bUPYSywXFW/IyIjgEf8I6tvMB5CcPLTyUOdlhCy+DLKUK+q9QAiEqWqm4GgjhAxowwGQ1t88RD2ikgy8A9gkYgcAgr9I6tvyGiZ8WgMQlBx86urAHj22tMdVhJ6+DLKMMXevF9ElgBJwAK/qOoj0kwfQlByWn6y0xJClk4NgogkqmqliKS2Orzefo+n62s3BhxJMS5c4UJVg4d6dxPRrl5fW9bgB6Z/e4jTEkKWrngIrwM/AFZhZVyWo94H+02dnxER0uOjKDlST1lVA7mpsU5LMhgcpVODoKo/sFdtmqSqe/pAU5+SnmAbhGpjEIKFG2d/AcCs6465RpChm3SpD0FVVUTeB07ys54+x4w0BB8Th6Q5LSFk8WWUYbWIjFXVL/ymxgGMQQg+fnzmcU5LCFl8WsoNuEZECoEavp7cFNS5sM1Ig8HwNb4YhO/6TYWDmGjF4OO6F61FwWf/eJzDSkIPX+IQCgFEJINWk5uCnZb5DCY4KXg494QMpyWELF0OXRaRi0RkG7AL+ARrKfd/dnLNiyJSKiJftTqWKiKLRGSb/Z5iHxcReUpEtovIOhHpk6lsxkMIPq79VgHXfqvAaRkhiS9zGR7CmuG4VVWPw1rjcXkn17wMnH/UsTuBxao6DFhs7wNcgDW1ehhWevc+WRXKdCoaDF/j03LwqloBhIlImKouAcZ0dIGdK+HoSMaLgdn29mzgklbHX1GL5UCyiAzyQV+3aG0QWi1MZQhgrpm1nGtmdfa/yNAdfOlUPCwi8cC/gL+KSCnWaIOvZKrqPnt7P5Bpb2cDRa3K7bWP7cOPxEZGkBAdQVW9h0O1blLjzJqBgc4PTs5yWkLI4ouHcDFQC9yONalpB3BhTx5urxXp879lEZkuIitFZGVZWVlPJACQnRwDQPGhuh7fy+B/rhqXx1Xj8pyWEZL4YhBmAINU1aOqs1X1KbsJ4SsHWpoC9nupfbwYyG1VLsc+9g1U9TlVHaOqY9LT07shoS1eg3DYGARD/8YXg5AALBSRT0XkFhHJ7PSK9pkHXGdvX4e1AEzL8R/aow0TgCOtmhZ+JTvFGIRg4oq/LOOKvyxzWkZI4kscwgPAAyJyMnAF8ImI7FXVc491jYjMASYDaSKyF7gPeAx4S0RuwEqwMtUu/gHwPWA7VtPkR75Xp3tkmSZDUHHZ6TlOSwhZfMq6bFOK1RlYAXQYIaKqVx3j1DntlFXgZ93Q02NamgwlxkMICi4fk9t5IUO38CUw6aci8jFW7MAA4KZgn8fQgmkyBBfupmbcTc1OywhJfPEQcoHbVHVteydFJEVVD/WOrL4lx3QqBhXTZq0A4M0Z33JYSejhSx/CXZ0UWQwE5coZafFRRIaHcbCmkdpGD7GR3WlJGfqKK8eZJoO/6M1fvvTivfqUsDBhUHI0hRW1lByuZ2iGWUg0kJlyqulU9Be+DDt2RlDH/ZpYhOChrrGJusYmp2WEJL1pEIIaM/QYPFz/0udc/9LnTssISUyTwcYMPQYP0ybkOy0hZOnKugypHZ1X1ZbZjN+ILQgmzNBj8HDhKWZyk7/oiofQej2GPOCQvZ0M7AGOgzaGISgxE5yCh8p6NwCJ0S6HlYQenfYhqOpxqjoY+BC4UFXTVHUA1uItC/0tsK8wnYrBw02zV3LT7JVOywhJfOlDmKCqN7XsqOo/ReS3ftDkCIOSrTSR+yvr8TQ1ExFu+lsDlR+dUeC0hJDFF4NQIiK/AV6z968BSnpfkjNERYSTkRBFaVUDB6oavB6DIfA4/0S/J9Lqt/jyb/AqIB14F3jH3j7W5KWgxAw9BgcHaxo5WNPotIyQxJfQ5YPATBGJU9XupE4LeLJTYlhbdNgMPQY4P3ltFWDmMviDLhsEEZkIzMJaAj5PRE4BZqjqT/0lrq8xk5yCg5vOCtoFxwMeX/oQfo+1etM8AFX9UkS+7RdVDtHSZNhrmgwBzbkju5usy9AZPnWlq2rRUYdCKqDcDD0GB6VV9ZRW1TstIyTxxUMospsNKiIuYCawyT+ynMEbrXio1mElho649fU1gOlD8Ae+GISbgT9grZVQjBWU5EjKM3+RPyCWyIgwdpbXsO9IHYOSzNBjIPKTyUOclhCydLnJoKrlqnqNqmaqaoaqTutmGvaAJTYygnNPyEAV/rEmZEIsQo7JwzOYPNws+OoPfMmpmC4id4vIc/Yiri+KyIv+FOcELck33l2z1yztFqCUHK4zQ8N+wpcmw1zgU6w5DSHVmdiaScenkxLrYuuBajaUVHJidpLTkgxHcfubVlpP04fQ+/hiEGJV9Q6/KQkQIiPCuPCULF5ZVsi7a4qNQQhAbj17mNMSQhZfhh3ni8j3/KYkgLj0NKvZMHdtCR6T7jvgOHNYGmcOS3NaRkjii0GYiWUU6kSkUkSqRKTSX8Kc5JScJAanxVFe3cCn28udlmM4ij0VteypMEPD/sCXUYYEVQ1T1RhVTbT3E/0pzilEhCmnZgPw91V7HVZjOJpfvf0lv3r7S6dlhCRdSaE2QlU3i0i7ay6o6urel+U8U07L5snF2/jnV/vZUVbNkHSTmj1QuP0/jndaQsjSlU7FXwDTgSfaOafA2b2qKEDISYll6phc5ny+h/9dtJWnrw7KNWhCkgmDBzgtIWTp1CCo6nT7/Tv+lxNY/Pycofx99V7eX7ePn0w6YkYcAoQdZdUAxmvzAz5NbhKRE0Vkqoj8sOXlL2GBwKCkGH5op/x+YuEWh9UYWrj7nfXc/c56p2WEJL7kQ7gPmAyMBD4ALgA+A17pzoNFZDdQhRXk5FHVMXbK9zeBAmA3MNXpBWR/+p2hzPl8D0u2lPHF7oOMLegwK72hD/j1+cOdlhCy+OIhXIa19sJ+Vf0RcArQUx/6O6o6WlXH2Pt3AotVdRjW4rF39vD+PSY1LpIb7YQcxksIDE7PT+X0fGOY/YEvBqFOVZsBj4gkAqVYS8T3JhcDs+3t2cAlvXz/bnHDWceREB3B8p0HWVUY1MtPhARb9lexZX+V0zJCEl8MwkoRSQaex1q8ZTWwrAfPVmChiKwSken2sUxV3Wdv7wfaTY0jItNFZKWIrCwrK+uBhK6RGO3i+okFAPzpo+1+f56hY+6d+xX3zv3KaRkhiXRnRp+IFACJqrqu2w8WyVbVYhHJABYBtwLzVDW5VZlDqprS0X3GjBmjK1f6f9GOgzWNnPHYR9S5m5h/65lmxMFBviw6DMApucmdlDS0ICKrWjXNj0mnHoKInHb0C0gFIo4VrNQVVLXYfi/FSu0+DjggIoPs5w7CapYEBKlxkUybkAfA00uMl+Akp+QmG2PgJ7oyytBeQFIL3QpMEpE4IExVq+zt84AHsRK4Xgc8Zr/P9fXe/uSmswYze1khCzbsZ3tpFUMzEpyW1C/ZUHIEgFFZxkvrbboSmOSPgKRM4F0RadHwuqouEJEvgLdE5AagEJjqh2d3m4zEaKaOyeG15Xv45d/W8coN48yCow7w4HsbAZMPwR90uQ9BRKKBnwJnYnkGnwLPqqqj6W/7qg+hhdLKeqb8eSnFh+s4OSeJV388nqRYYxT6EuMh+E6v9SG04hVgFPBH4E/29qvdkxe8ZCRG8+aMCeSlxrJu7xGuen65WVasjxmVlWSMgZ/wxSCcqKo3qOoS+3UTllHod+SkxPLmjAkclxbHxn2V/OczS9ldHpKr2wUkXxYd9o40GHoXXwzCahGZ0LIjIuOBvvPVA4xBSTG8OX0CIwclsqu8hkufWcqqQkejrPsNj3ywiUc+CKklQQIGX/oQNgHDgT32oTxgC+ABVFVP9ovCTujrPoSjqW7w8LO/ruaTrWVERYTx+k0TOD2/w9AJQw9piVIcPtCM8nSVrvYh+GIQ8js6r6qFXdTWqzhtEAA8Tc38+u/reGd1MWcNS+PVG8Y7qsdgOBp/dCoOU9XC1i9gcqvtfktEeBj3/WAUMa5wPt1WbuLs/cyqQjOnxF/4YhDuFZFnRCRORDJF5D3gQn8JCzaSYl1MHWNla37hs50OqwltfrtgC79dYGae+gNfDMIkYAewFisPwuuqeplfVAUpPzrjOESsZeDKqhqclhOyPHLpSTxy6UlOywhJfDEIKVjzDXYADUC+2KGGBouCtDj+44RMGpuaeXXZbqflhCxD0uNN+jQ/4YtBWA4sUNXzgbFAFvBvv6gKYlqSqby6vNB4CX5i+c4Klu8MqXWGAwZfDMK5gFtE7lXVOuB3BEBGo0BjbEEKp+QkcajWzfhHPuSKvyzj9RV7aG42C8f2Fr9ftJXfL9rqtIyQxJdhx2eAZuBsVT1BRFKAhao61p8COyMQhh2PZkdZNQ++t5GlO8pxN1mf750XjODmSUMcVhYatKzalDcg1mElwUNXhx19Wex1vKqeJiJrAFT1kIhEdlthCDMkPZ7ZPx5HZb2bd1cXc9+8DTyxcAtnDUszMfi9gDEE/sOXJoNbRMKxZjoiIulYHoPhGCRGu7huYgHTJuThblJuf3Mt9e4mp2UFPZ9tK+ezbWbNTX/gi0F4CiuzUYaIPIw19PiIX1SFGPd8bySD0+LYeqCah+ZvZN+ROrqTus5g8cePtvHHj7Y5LSMk8SmnooiMwErFLljp0h2fYRKIfQjt8WXRYf7zmaV47M7FGFc4ZwwdwO8uP4XkWNPy8oWSw3UAZCXHOKwkeOj1uQyBSrAYBID31+3j5aW72FlWQ4WdQ2HkoEReu3E8qXHGKBj8hzEIAU7RwVque/FzdpbXMDwzgdduHE96QpTTsoKCj7dYuXcnD89wWEnw4I9RBkMvkpsayxvTJ3DNrBVsOVDF5P9ZwgmDEhkxKIHT8lKYdHw6A+KNgWiPZz7eARiD4A+Mh+Aw5dUNTH9lJav3tM0AJAIn5yQzccgATs9L4dS8ZGMgbEqrrDSeGQnRDisJHkyTIcgor25g075KNpRU8u/t5azYeZDGprajut8/aRCPTDnJJHU1+IwxCEFObaOHZTsqWFl4iNWFh1hTdJhGTzPZyTH84crRjOnHq1B/uPEAAOeObHelP0M7mD6EICc2MoJzTsjknBOsH31hRQ0/n7OGL/ce4YrnlnPx6CwuGZ3NxCEDiAj3JZwk+Hn+UyvfhDEIvY/xEIKIRk8zTyzawl8++ToBy4C4SLJTYoiLjCA51sW441I5e0QG+QPiHFTqX1rS3puh2q5jmgwhzO7yGv6xtpi5a0vYdYz073mpseSkxJCeEEVmYjT5A2IpGBBHbkosaQmRxEYa57A/YQxCP0BVKayo5XCdm5oGDyWH6/hkaxmfbC2jqt7T4bUxrnA7oUsG540ayKisRIIl382Cr/YBcP6JgxxWEjwYg9CPcTc1s720mrKqBsqqGth3pI7dFbUUVtRQfKiO8ppGGj1tRzBiXOEkxbhIinGRkxLDqKxERmYlkZcaS2pcJClxLqIiwh2qUVuu+MsywKzt6AvGIBiOiapS3eBh9Z7D/N+G/SzaeKBL2Z1aDMWJWUmMGJTIiIEJZCfHEBbWt55FZb0bwCy06wPGIBi6TIuBqKz3cLi2kZ1lNWwoqWTjvkoOHKmnoqaRQ7WNNLWT9Sk2MpzUuEiSYlwkRruIj44gPiqC2MhwoiLCcUUIMa5wEqNdJMa4iI+yzsdEhpMc6yIrKYaYyMDwPEKZoB12FJHzgT8A4cAsVX3MYUkhj4iQEO0iIdpFdnIMo7KSuPCUrDZlPE3N7CirYUPJETaUVLJlfxWb91dRXt1AbWMdew/Vdfv5qXGRDEyMJi0hirT4SBKiIggPC8MVLkRGhBHtCicqIsxrVDbuO0JCtIsrx+YSHxURNH0fwUBAeQh4I0caAAAILElEQVR2ApatwH8Ae4EvgKtUdeOxrjEegrMcqXNzuLaRyjoPR+rcVDd4qGnwUNPoodHTTIOnmXp3E1X1Hirr3FTWe6hze6hpaOJgTSP7jtR508x1h2hXGDGucFzhYbjCw4hyhREdEU6s7YEkx1reS1REGJERYURFhBPtsoxMbGSL5xJBYrSLpFiXXTb0PJZg9RDGAdtVdSeAiLwBXAwc0yAYnKWlI7K7NDcrZdUNHKisp7za6gStbWyiqVlxNymNnmbqPU1eo3K41k1FdQPlNQ2UVzVS526i3t27ibsiI8KIiwwnzm7eJNrNobiocGJc4US7wokIE8LDBBGhtYMiQJh9TEQQrHkpwtflWvszveHcnJqXwhlD03p+IwLPIGQDRa329wJmocQQJixMyEyMJjPR94lKqkptYxMNnmbcTc22R2IZiJoGD4fr3ByqaaSy3k2jp7mNx1Lvbqam0UNVveXZVNa7qaxzc7j267KHat1+qHHvM/3bg0PWIHQJEZkOTAfIy8tzWI2hr3l3zV4AppyaQ1xUBHG9OAlUVWnwWAalpqGJqgY3lXUeKuvd1DZ6qGtsps7dRFNzM81Kux2tzc1Ks4JivaOKeu/f6ln0TnO9N+e1BJpBKAZyW+3n2MfaoKrPAc+B1YfQN9IMgcIbn1tO5JRTc3r93iJCtN0sGNAPF4cKNIPwBTBMRI7DMgRXAlc7K8kQaLx2o2lF+ouAMgiq6hGRW4D/wxp2fFFVNzgsyxBguPrZ7M6+JKAMAoCqfgB84LQOQ+Dyt5VWk+HyMbmdlDT4ijG1hqDj7VV7eXvVXqdlhCQBFZjUHUSkDCjspFga0B+W+ukv9YT+U9feqme+qqZ3VijoDUJXEJGVXYnSCnb6Sz2h/9S1r+tpmgwGg8GLMQgGg8FLfzEIzzktoI/oL/WE/lPXPq1nv+hDMBgMXaO/eAgGg6ELGINgMBi8hLxBEJHzRWSLiGwXkTud1tNbiEiuiCwRkY0iskFEZtrHU0VkkYhss99TnNbaG4hIuIisEZH59v5xIrLC/l7fFJGgX6RBRJJF5G0R2Swim0TkW339fYa0QbAzMD0NXACMBK4SkZHOquo1PMB/qepIYALwM7tudwKLVXUYsNjeDwVmApta7T8O/F5VhwKHgBscUdW7/AFYoKojgFOw6tun32dIGwRaZWBS1UagJQNT0KOq+1R1tb1dhfXjycaq32y72GzgEmcU9h4ikgN8H5hl7wtwNvC2XSTo6ykiScC3gRcAVLVRVQ/Tx99nqBuE9jIwZTukxW+ISAFwKrACyFTVffap/UAoLID4JPBroCVX2gDgsKq2rEYTCt/rcUAZ8JLdNJolInH08fcZ6gYh5BGReODvwG2qWtn6nFpjykE9riwiPwBKVXWV01r8TARwGvCMqp4K1HBU86Avvs9QNwhdysAUrIiIC8sY/FVV37EPHxCRQfb5QUCpU/p6iTOAi0RkN1aT72ystnayiLRM3w+F73UvsFdVV9j7b2MZiD79PkPdIHgzMNm90FcC8xzW1CvY7egXgE2q+r+tTs0DrrO3rwPm9rW23kRV71LVHFUtwPr+PlLVa4AlwGV2sVCo536gSESG24fOwco23qffZ8hHKorI97DaoC0ZmB52WFKvICJnAp8C6/m6bX03Vj/CW0Ae1rTwqap60BGRvYyITAZ+qao/EJHBWB5DKrAGmKaqna9HF8CIyGisjtNIYCfwI6x/2n32fYa8QTAYDF0n1JsMBoPBB4xBMBgMXoxBMBgMXoxBMBgMXoxBMAQ1IjJZRCY6rSNUMAbBEOxMBoxB6CWMQQgBRKTAni77vD0VeqGIxByj7FAR+VBEvhSR1SIyRCz+R0S+EpH1InKFXXayiHwiInNFZKeIPCYi14jI53a5IXa5l0XkWRFZKSJb7XBjRCRaRF6yy64Rke/Yx68XkXdEZIE9rfe3rfSdJyLLbG1/s0OzEZHdIvKAfXy9iIyw53DcDNwuImtF5CwRudyux5ci8i9/fu4hiaqaV5C/gAKs6dCj7f23sAJ12iu7Aphib0cDscB/AouwgrcygT3AIKz/voft7Sis8OAH7GtnAk/a2y8DC7D+wQzDCsONBv4LKxgMYIR932jgeqzAmyR7vxArxDwN+BcQZ19zB3Cvvb0buNXe/ikwy96+HytYqaV+64FsezvZ6e8m2F7GQwgddqnqWnt7FZaRaIOIJGD9sbwLoKr1qloLnAnMUdUmVT0AfAKMtS/7Qq2p1g3ADmChfXz9Uc94S1WbVXUb1h/7CPu+r9nP2oz1h3+8XX6xqh5R1XqsEN18rLwOI4F/i8harFDd/FbPaJmv0W79bP4NvCwiN2EZOIMPBNzajoZu0zpstwlot8nQw/s2t9pvpu3v5+iQ185CYI/WGwEIsEhVr+rkmpby30BVbxaR8Vj5E1aJyOmqWtGJFoON8RD6EWolUtkrIpcAiEiUiMRizYm4wk5Tlo6VqONzH29/uYiE2f0Kg4Et9n2vsZ91PFY8/pYO7rEcOENEhtrXxNnXdUQVkNCyIyJDVHWFqt6LlV/ArAjrA8Yg9D+uBX4uIuuApcBA4F1gHfAl8BHwa7Vm3/nCHiwj8k/gZrsp8GcgTETWA28C12sHE5BUtQyrf2GOrW8ZVtOjI94DprR0KgL/Y3c6fmXX70sf69GvMZObDD1GRF4G5qvq252VNQQ2xkMwGAxejIcQoojI01jZhlrzB1V9yQk9huDAGASDweDFNBkMBoMXYxAMBoMXYxAMBoMXYxAMBoMXYxAMBoMXYxAMBoOX/w+zPLejCJyRbgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "% matplotlib inline\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn import linear_model, decomposition, datasets\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.model_selection import GridSearchCV\n", "\n", "logistic = linear_model.LogisticRegression()\n", "\n", "pca = decomposition.PCA()\n", "pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])\n", "\n", "digits = datasets.load_digits()\n", "X_digits = digits.data\n", "y_digits = digits.target\n", "\n", "# Plot the PCA spectrum\n", "pca.fit(X_digits)\n", "\n", "plt.figure(1, figsize=(4, 3))\n", "plt.clf()\n", "plt.axes([.2, .2, .7, .7])\n", "plt.plot(pca.explained_variance_, linewidth=2)\n", "plt.axis('tight')\n", "plt.xlabel('n_components')\n", "plt.ylabel('explained_variance_')\n", "\n", "# Prediction\n", "n_components = [20, 40, 64]\n", "Cs = np.logspace(-4, 4, 3)\n", "\n", "# Parameters of pipelines can be set using ‘__’ separated parameter names:\n", "estimator = GridSearchCV(pipe,\n", " dict(pca__n_components=n_components,\n", " logistic__C=Cs))\n", "estimator.fit(X_digits, y_digits)\n", "\n", "plt.axvline(estimator.best_estimator_.named_steps['pca'].n_components,\n", " linestyle=':', label='n_components chosen')\n", "plt.legend(prop=dict(size=12))\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1797, 64)\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP4AAAECCAYAAADesWqHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAC8tJREFUeJzt3X+o1fUdx/HXazetlpK2WoRGZgwhguUPZFHEphm2wv2zRKFgsaF/bJFsULZ/Rv/1V7Q/RiBWCzKjawkjtpaSEUGr3Wu2TG2UGCnVLTTM/lCy9/44X4eJ637v3f187jnn/XzAwXO9x/P63Ht9ne/3e+73nLcjQgBy+c5kLwBAfRQfSIjiAwlRfCAhig8kRPGBhLqi+LaX237X9nu21xfOesz2iO3dJXNOy7vc9g7be2y/Y/uewnnn2X7D9ltN3gMl85rMAdtv2n6+dFaTd8D227Z32R4qnDXD9hbb+2zvtX1dwax5zdd06nLU9roiYRExqRdJA5LelzRX0lRJb0m6umDejZIWSNpd6eu7TNKC5vp0Sf8u/PVZ0rTm+hRJr0v6UeGv8beSnpL0fKXv6QFJF1fKekLSr5rrUyXNqJQ7IOljSVeUuP9u2OIvlvReROyPiBOSnpb0s1JhEfGKpMOl7v8seR9FxM7m+heS9kqaVTAvIuJY8+GU5lLsLC3bsyXdKmljqYzJYvtCdTYUj0pSRJyIiM8rxS+V9H5EfFDizruh+LMkfXjaxwdVsBiTyfYcSfPV2QqXzBmwvUvSiKRtEVEy72FJ90r6umDGmULSi7aHba8pmHOlpE8lPd4cymy0fUHBvNOtkrS51J13Q/FTsD1N0rOS1kXE0ZJZEXEyIq6VNFvSYtvXlMixfZukkYgYLnH/3+KGiFgg6RZJv7Z9Y6Gcc9Q5LHwkIuZL+lJS0eegJMn2VEkrJA2WyuiG4h+SdPlpH89u/q5v2J6iTuk3RcRztXKb3dIdkpYXirhe0grbB9Q5RFti+8lCWf8VEYeaP0ckbVXncLGEg5IOnrbHtEWdB4LSbpG0MyI+KRXQDcX/p6Qf2L6yeaRbJekvk7ymCWPb6hwj7o2IhyrkXWJ7RnP9fEnLJO0rkRUR90fE7IiYo87P7aWIuKNE1im2L7A9/dR1STdLKvIbmoj4WNKHtuc1f7VU0p4SWWdYrYK7+VJnV2ZSRcRXtn8j6e/qPJP5WES8UyrP9mZJP5Z0se2Dkv4QEY+WylNnq3inpLeb425J+n1E/LVQ3mWSnrA9oM4D+zMRUeXXbJVcKmlr5/FU50h6KiJeKJh3t6RNzUZpv6S7CmadejBbJmlt0ZzmVwcAEumGXX0AlVF8ICGKDyRE8YGEKD6QUFcVv/Dpl5OWRR553ZbXVcWXVPObW/UHSR553ZTXbcUHUEGRE3hs9/VZQTNnzhzzvzl+/LjOPffcceXNmjX2FysePnxYF1100bjyjh4d+2uIjh07pmnTpo0r79Chsb80IyLUnL03ZidPnhzXv+sVETHqN2bST9ntRTfddFPVvAcffLBq3vbt26vmrV9f/AVv33DkyJGqed2IXX0gIYoPJETxgYQoPpAQxQcSovhAQhQfSIjiAwm1Kn7NEVcAyhu1+M2bNv5Jnbf8vVrSattXl14YgHLabPGrjrgCUF6b4qcZcQVkMWEv0mneOKD2a5YBjEOb4rcacRURGyRtkPr/ZblAr2uzq9/XI66AjEbd4tcecQWgvFbH+M2ct1Kz3gBUxpl7QEIUH0iI4gMJUXwgIYoPJETxgYQoPpAQxQcSYpLOONSebDN37tyqeeMZEfb/OHz4cNW8lStXVs0bHBysmtcGW3wgIYoPJETxgYQoPpAQxQcSovhAQhQfSIjiAwlRfCAhig8k1GaE1mO2R2zvrrEgAOW12eL/WdLywusAUNGoxY+IVyTVfRUFgKI4xgcSYnYekNCEFZ/ZeUDvYFcfSKjNr/M2S3pN0jzbB23/svyyAJTUZmjm6hoLAVAPu/pAQhQfSIjiAwlRfCAhig8kRPGBhCg+kBDFBxLqi9l5CxcurJpXe5bdVVddVTVv//79VfO2bdtWNa/2/xdm5wHoChQfSIjiAwlRfCAhig8kRPGBhCg+kBDFBxKi+EBCFB9IqM2bbV5ue4ftPbbfsX1PjYUBKKfNufpfSfpdROy0PV3SsO1tEbGn8NoAFNJmdt5HEbGzuf6FpL2SZpVeGIByxnSMb3uOpPmSXi+xGAB1tH5Zru1pkp6VtC4ijp7l88zOA3pEq+LbnqJO6TdFxHNnuw2z84De0eZZfUt6VNLeiHio/JIAlNbmGP96SXdKWmJ7V3P5aeF1ASiozey8VyW5wloAVMKZe0BCFB9IiOIDCVF8ICGKDyRE8YGEKD6QEMUHEuqL2XkzZ86smjc8PFw1r/Ysu9pqfz/BFh9IieIDCVF8ICGKDyRE8YGEKD6QEMUHEqL4QEIUH0iI4gMJtXmX3fNsv2H7rWZ23gM1FgagnDbn6h+XtCQijjXvr/+q7b9FxD8Krw1AIW3eZTckHWs+nNJcGJgB9LBWx/i2B2zvkjQiaVtEMDsP6GGtih8RJyPiWkmzJS22fc2Zt7G9xvaQ7aGJXiSAiTWmZ/Uj4nNJOyQtP8vnNkTEoohYNFGLA1BGm2f1L7E9o7l+vqRlkvaVXhiActo8q3+ZpCdsD6jzQPFMRDxfdlkASmrzrP6/JM2vsBYAlXDmHpAQxQcSovhAQhQfSIjiAwlRfCAhig8kRPGBhJidNw7bt2+vmtfvav/8jhw5UjWvG7HFBxKi+EBCFB9IiOIDCVF8ICGKDyRE8YGEKD6QEMUHEqL4QEKti98M1XjTNm+0CfS4sWzx75G0t9RCANTTdoTWbEm3StpYdjkAami7xX9Y0r2Svi64FgCVtJmkc5ukkYgYHuV2zM4DekSbLf71klbYPiDpaUlLbD955o2YnQf0jlGLHxH3R8TsiJgjaZWklyLijuIrA1AMv8cHEhrTW29FxMuSXi6yEgDVsMUHEqL4QEIUH0iI4gMJUXwgIYoPJETxgYQoPpBQX8zOqz0LbeHChVXzaqs9y67293NwcLBqXjdiiw8kRPGBhCg+kBDFBxKi+EBCFB9IiOIDCVF8ICGKDyRE8YGEWp2y27y19heSTkr6irfQBnrbWM7V/0lEfFZsJQCqYVcfSKht8UPSi7aHba8puSAA5bXd1b8hIg7Z/r6kbbb3RcQrp9+geUDgQQHoAa22+BFxqPlzRNJWSYvPchtm5wE9os203AtsTz91XdLNknaXXhiActrs6l8qaavtU7d/KiJeKLoqAEWNWvyI2C/phxXWAqASfp0HJETxgYQoPpAQxQcSovhAQhQfSIjiAwlRfCAhR8TE36k98Xf6LebOnVszTkNDQ1Xz1q5dWzXv9ttvr5pX++e3aFF/v5wkIjzabdjiAwlRfCAhig8kRPGBhCg+kBDFBxKi+EBCFB9IiOIDCVF8IKFWxbc9w/YW2/ts77V9XemFASin7UCNP0p6ISJ+bnuqpO8WXBOAwkYtvu0LJd0o6ReSFBEnJJ0ouywAJbXZ1b9S0qeSHrf9pu2NzWCNb7C9xvaQ7bovXQMwZm2Kf46kBZIeiYj5kr6UtP7MGzFCC+gdbYp/UNLBiHi9+XiLOg8EAHrUqMWPiI8lfWh7XvNXSyXtKboqAEW1fVb/bkmbmmf090u6q9ySAJTWqvgRsUsSx+5An+DMPSAhig8kRPGBhCg+kBDFBxKi+EBCFB9IiOIDCfXF7Lza1qxZUzXvvvvuq5o3PDxcNW/lypVV8/ods/MAnBXFBxKi+EBCFB9IiOIDCVF8ICGKDyRE8YGEKD6Q0KjFtz3P9q7TLkdtr6uxOABljPqeexHxrqRrJcn2gKRDkrYWXheAgsa6q79U0vsR8UGJxQCoY6zFXyVpc4mFAKindfGb99RfIWnwf3ye2XlAj2g7UEOSbpG0MyI+OdsnI2KDpA1S/78sF+h1Y9nVXy1284G+0Kr4zVjsZZKeK7scADW0HaH1paTvFV4LgEo4cw9IiOIDCVF8ICGKDyRE8YGEKD6QEMUHEqL4QEIUH0io1Oy8TyWN5zX7F0v6bIKX0w1Z5JFXK++KiLhktBsVKf542R6KiEX9lkUeed2Wx64+kBDFBxLqtuJv6NMs8sjrqryuOsYHUEe3bfEBVEDxgYQoPpAQxQcSovhAQv8BVOSY4UmSu60AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Compare the performance\n", "from sklearn.datasets import load_digits\n", "from sklearn.linear_model.logistic import LogisticRegression\n", "from sklearn import decomposition\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import accuracy_score\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "# load digital data\n", "digits, dig_label = load_digits(return_X_y=True)\n", "print(digits.shape)\n", "\n", "# draw one digital\n", "plt.gray() \n", "plt.matshow(digits[0].reshape([8, 8])) \n", "plt.show() \n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy train = 0.998608, accuracy_test = 0.897222\n" ] } ], "source": [ "\n", "# calculate train/test data number\n", "N = len(digits)\n", "N_train = int(N*0.8)\n", "N_test = N - N_train\n", "\n", "# split train/test data\n", "x_train = digits[:N_train, :]\n", "y_train = dig_label[:N_train]\n", "x_test = digits[N_train:, :]\n", "y_test = dig_label[N_train:]\n", "\n", "# do logistic regression\n", "lr=LogisticRegression()\n", "lr.fit(x_train,y_train)\n", "\n", "pred_train = lr.predict(x_train)\n", "pred_test = lr.predict(x_test)\n", "\n", "# calculate train/test accuracy\n", "acc_train = accuracy_score(y_train, pred_train)\n", "acc_test = accuracy_score(y_test, pred_test)\n", "print(\"accuracy train = %f, accuracy_test = %f\" % (acc_train, acc_test))\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy train = 0.987474, accuracy_test = 0.894444\n" ] } ], "source": [ "# do PCA with 'n_components=40'\n", "pca = decomposition.PCA(n_components=40)\n", "pca.fit(x_train)\n", "\n", "x_train_pca = pca.transform(x_train)\n", "x_test_pca = pca.transform(x_test)\n", "\n", "# do logistic regression\n", "lr=LogisticRegression()\n", "lr.fit(x_train_pca,y_train)\n", "\n", "pred_train = lr.predict(x_train_pca)\n", "pred_test = lr.predict(x_test_pca)\n", "\n", "# calculate train/test accuracy\n", "acc_train = accuracy_score(y_train, pred_train)\n", "acc_test = accuracy_score(y_test, pred_test)\n", "print(\"accuracy train = %f, accuracy_test = %f\" % (acc_train, acc_test))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## References\n", "* [Pipelining: chaining a PCA and a logistic regression](http://scikit-learn.org/stable/auto_examples/plot_digits_pipe.html)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" }, "main_language": "python" }, "nbformat": 4, "nbformat_minor": 2 }