|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Linear Models\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This example uses the only the first feature of the diabetes dataset, in order to illustrate a two-dimensional plot of this regression technique. The straight line can be seen in the plot, showing how linear regression attempts to draw a straight line that will best minimize the residual sum of squares between the observed responses in the dataset, and the responses predicted by the linear approximation.\n",
- "\n",
- "The coefficients, the residual sum of squares and the variance score are also calculated.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "%matplotlib inline\n",
- "\n",
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "from sklearn import datasets, linear_model\n",
- "from sklearn.metrics import mean_squared_error, r2_score\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Load the diabetes dataset\n",
- "diabetes = datasets.load_diabetes()\n",
- "\n",
- "# Use only one feature\n",
- "diabetes_X = diabetes.data[:, np.newaxis, 2]\n",
- "\n",
- "# Split the data into training/testing sets\n",
- "diabetes_X_train = diabetes_X[:-20]\n",
- "diabetes_X_test = diabetes_X[-20:]\n",
- "\n",
- "# Split the targets into training/testing sets\n",
- "diabetes_y_train = diabetes.target[:-20]\n",
- "diabetes_y_test = diabetes.target[-20:]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Coefficients: \n",
- " [938.23786125]\n",
- "Mean squared error: 2548.07\n",
- "Variance score: 0.47\n"
- ]
- }
- ],
- "source": [
- "# Create linear regression object\n",
- "regr = linear_model.LinearRegression()\n",
- "\n",
- "# Train the model using the training sets\n",
- "regr.fit(diabetes_X_train, diabetes_y_train)\n",
- "\n",
- "# Make predictions using the testing set\n",
- "diabetes_y_pred = regr.predict(diabetes_X_test)\n",
- "\n",
- "# The coefficients\n",
- "print('Coefficients: \\n', regr.coef_)\n",
- "\n",
- "# The mean squared error\n",
- "print(\"Mean squared error: %.2f\"\n",
- " % mean_squared_error(diabetes_y_test, diabetes_y_pred))\n",
- "\n",
- "# Explained variance score: 1 is perfect prediction\n",
- "print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAADuCAYAAAAOR30qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEGRJREFUeJzt3W+MXFX9x/HPnf7RHaC1UFBjmXuRWKlFEFir8RcV/+H/JwY1cawx/pkHBEIkoUYm0WgyxOojIfgzQ41R9z5RiSZiTEqtxJhodCskFmEJkblbNJi2gm0zXfpnrw+Os9t2d+be2+6de+6571fSB52ebb6bhU++/Z5zz/XiOBYAoHi1ogsAABgEMgBYgkAGAEsQyABgCQIZACxBIAOAJQhkALAEgQwAliCQAcASq7Ms3rhxYxwEQU6lAICb9u3bdyiO48uT1mUK5CAIND09ff5VAUAFeZ4XpVnHyAIALEEgA4AlCGQAsASBDACWIJABwBIEMgCnhWGoIAhUq9UUBIHCMCy6pKEyHXsDgDIJw1CtVkv9fl+SFEWRWq2WJKnZbBZZ2rLokAE4q91uL4TxQL/fV7vdLqii0QhkAM6anZ3N9HnRCGQAzmo0Gpk+LxqBDMBZnU5H9Xr9rM/q9bo6nU5BFY1GIANwVrPZVLfble/78jxPvu+r2+1auaEnSV4cx6kXT05OxlwuBADZeJ63L47jyaR1dMgAYAkCGQAsQSADgCUIZACwBIEMAJYgkAHAEgQyAFiCQAYASxDIAGAJAhkALEEgA4AlCGQAsASBDACWIJABwBIEMgBYgkAGAEsQyABgCQIZACxBIAOAJQhkALAEgQwAliCQAcASBDIAWIJABgBLEMgAYAkCGQAsQSADgCUIZACwBIEMAJYgkAHAEgQyAFiCQAYASxDIAGAJAhkALEEgA4AlCGQAsASBDACWIJABwBIEMgBYgkAGAEsQyABgCQIZACxBIAOAJQhkALAEgQwAliCQAcASBDIAWIJABgBLEMgAYAkCGYCznn9euuEGyfOka66RpqeLrmg0AhmAlcIwVBAEqtVqCoJAYRim/tpf/tKE8KtfLT3+uPlsZkb60Y9yKnaFrC66AAA4VxiGarVa6vf7kqQoitRqtSRJzWZz2a85cUK67Tbp+98f/vcO+VJreHEcp148OTkZT9ve8wMovSAIFEXRks9931ev1zvrs6eekt72NumFF4b/fVdfLe3dKzUaK1xoSp7n7YvjeDJpHSMLANaZnZ1N/Px73zNjiS1bhofx3XdLp05JzzxTXBhnwcgCgHUajcayHfKmTVt1yy3SI4+M/vpHH5Xe+c58assTHTIA63Q6HdXr9TM++T9JsQ4c+OvQMH73u02nHMflDGOJDhmAhZrNpubnPX3hC1t14sT1I9fef790++1jKixnBDIAqzz5pPSGN0jSp4auWbdO+sMfBuvcwcgCgBW+/nWzSTcqZD/7WWluTvrPf9wLY4kOGUCBjh2TNm6UXnpp9LpvflP68pfHU1OR6JABC13IU2pl8JvfmG74kktGh/HMjNmkq0IYSwQyYJ3BU2pRFCmO44Wn1MoeynEsfeITJojf+97h697xDun0abN+8+bx1WcDntQDLJPlKbUy+Mc/pE2bktf99KfSrbfmX08ReFIPKKk0T6mVwa5dphtOCuNDh0w37GoYZ0EgA5ZpDHnGd9jnNjl50lxz6XnSF784fN1tt5kQjmPpssvGV5/tCGTAMkufUpPq9bo6nU5BFSV77DETwmvXmo24Yf74RxPCDzwwvtrKhEAGLNNsNtXtduX7vjzPk+/76na7Q6+dLNLdd5sgvvHG4WsaDXN2OI6lt7xlfLWVEZt6ADJ58UVpw4bkdffdJ91xR/71lEHaTT0eDAGQysMPSx/9aPK6Z5+VgiD3cpzEyALAUHEsffCDZiwxKow//GFpft6sJ4zPHx0ygCV6Pemqq5LXPfywCWOsDDpkAAvuu890w0lh/OKLphsmjFcWgQxU3LFjJoQ9T7rzzuHrduxYPDu8fv346qsSAhmoqB//ePGCn1Eee8yE8M6d46mrypghAxWzZo158ecoW7eaIF6zZjw1waBDBirg2WcXxxKjwnjXLtMN799PGBeBQAYcdtddJoRf+9rR6/bvN0H8+c+Ppy4sj5EF4JhTp9J3t/PzJrBhBzpkwBGPPmrCNSmMv/OdxdMShLFd6JCBktu2Tfrzn5PXHTrEVZe2I5CBEnrhBenSS5PXXX+99Pjj+deDlcHIAiiR737XjBmSwnjPHjOSIIzLhQ4ZsFwcS7WUrdPJk9Jq/q8uLTpkwFJPPmm64aQwvuOOxU06wrjc+PEBlrnqKnPbWpJnnpGuvjr3cjBGBDJggePHpXNeozdUhpf8oGQYWQAFGmzSJYXxD36wOJaAu+iQgQKkfSDj8OF0x9vgBjrkc4RhqCAIVKvVFASBwjAsuiQ4otdbvOAnyaAbJoyrhUA+QxiGarVaiqJIcRwriiK1Wi1CGRfkk59M9xaOX/yCsUTVeXGGn/7k5GQ8PT2dYznFCoJAURQt+dz3ffXSbHsD/5Pl7PCpU9KqVfnWg2J5nrcvjuPJpHV0yGeYnZ3N9DncdCFjq927050d/sAHFrthwhgDbOqdodFoLNshNxqNAqpBEQZjq36/L0kLYytJajabQ79uYkKam0v++2dmpM2bV6RUOIgO+QydTkf1c84f1et1dTqdgirCuLXb7YUwHuj3+2q320vWHjmyuEmXFMaDbpgwxigE8hmazaa63a5835fnefJ9X91ud2RnBLekGVvde68J4aQ3L+/cySYdsiGQz9FsNtXr9TQ/P69er0cYV8yw8VSj0Vjohpdpls9y9KgJ4R07cigwBxz1tAeBDJxh6djqGkmxoqg38ute8YrFbvjii/OscGVx1NMuHHsDzhGGoT73uS06ceLGxLV790rvetcYisoJRz3HI+2xN05ZAP+z+HLQ5DGVKy8H5ainXRhZoPIeeCDdy0G3b3fv5aCjZuYYPzpkVFbaUJ2dla68Mt9aitLpdM46dy1x1LNIdMiolH/+M/sFP66GscRRT9sQyKiEj3zEhPBrXjN63Ve/Wr2zwxz1tAcjCzgt7Vii3zePPwNFokOGc37+8+xjCcIYNqBDhjPSdsO7d0vve1++tQDng0BGqfX70kUXpVtbpbkwyomRBUqp1TIdcVIY+371NulQXnTIKJW0Y4m//z35lUmAbeiQYb0nnsi+SUcYo4wIZFhrEMLXXjt63Ve+wlgCbiCQC8Q9tEsN7olI0w2/9JJZf++9+dcFjAOBXBDuoT3bt76V7uWg0mI3vHZt/nUB48R9yAXhHloj7Sbdnj3Se96Tby1AXrgP2XJVvof24EHpiivSrWUujCphZFGQKt5D+8Y3mo44KYxf+Uo26VBNBHJBlr67zd17aAebdPv3j1733HMmhJ9/fjx1AbYhkAvi+j20e/ZkPzucdDUm4Do29bCi0m7S3XOP5OA/BoBlsamHsVl8OWi6tatW5VsPUFaMLHDe7ror3ctBpcWxBGEMDEeHjMzSjiV+9zvp7W/PtxbAJQQyUun10l/Yw3E14PwwssBIN9xgOuKkMN62jbPDwIWiQ8ay0o4l/v1vacOGfGsBqoIOGQt+/evsZ4cJY2DlEMhYCOEPfShp5Xb5fqCpqWreSAfkjZFFRc3NSRMT6dZOTFyk48f7kqQoklqtliQ581QhYAs65Ir50pdMN5wUxhs2mJGE7wcLYTzQ7/fVbrdzrBKoJjrkiki7STczI23evPj7Kl8TCowbHbLDnn46+ybdmWEsVfOaUKAoBLKDLrvMhPDrXz963Z13Jp8drtI1oUDRGFk4Io7TvY9Oko4fl17+8nRrBxt37XZbs7OzajQa6nQ6bOgBOeD6zZKbmpK2b0+3lqfogGJw/abj0m7S/epXac4XA7ABM+SSCMNQjca1mTfpCGOgPAjkEnjrWyN9+tNNHTgw+qV0113HBT9AmTGysNhiJ+yPXHfggLRpU+7lAMgZHbJl9u1Lf3bY82qKY8IYcAWBbIlBCE8m7sPeI8mT5PFwBuAYRhYFmp9P/465iYl1On786MLveTgDcA8dcgF27zbdcJowHmzSPfjg/8v3fXmeJ9/31e12eTgDcAyBPEYve5kJ4ve/f/S63/9+6WmJZrOpXq+n+fl59Xo9wjhBGIYKgkC1Wk1BECgMucMZ9mNkkbMjR6T169Ot5bjaygjDUK1WS/3+4A7niDucUQp0yDnpdEw3nBTG3/42Z4dXWrvdXgjjAe5wRhnQIa+wtI80Hz0qXXxxvrVUFXc4o6zokFfA3/6W7uzwpZcudsOEcX64wxllRSBfgJtvNiG8devodXv3mhA+fHgsZa24sm2QcYczyoqRRUanTklr1qRbOz+ffoRhqzJukHGHM8qK+5BT+tnPpI9/PHndZz4j/fCH+dczLkEQKIqiJZ/7vq9erzf+goAS4j7kFZK2w3X1gh82yIDxYYa8jIMHs78c1MUwltggA8aJQD7Dgw+aEL7iitHrdu2qztlhNsiA8WFkofRjibk58/hzlbBBBoxPZTf1/vUv6VWvSl63ZYs5ZwwA5yvtpl7lRhZTU6YjTgrjmRkzkrAtjMt2JhhAepUYWZw+LW3bJv3lL8lrbZ4Ll/FMMID0nO6Qn3jCdMOrV48O46mpYjfp0na9XJoDuM3JDvlrX5O+8Y3RazZulGZnpYmJ8dQ0TJaulzPBgNuc6ZCPHZPWrjUd8agw3rnTdMIHDxYfxlK2rpczwYDbSh/IjzxiQviSS6STJ4eve/ppE8Q7doyvtjSydL2cCQbcVspAjmPp1ltNEN9yy/B1N99sNvTiWHrd68ZWXiZZut5ms6lut8u79QBHlSqQn3vOhHCtJj300PB1Dz1kQvi3vzVrbZa16+XdeoC7LI8ro9s1QXzllaPXHT5sgvhjHxtPXSuBrhfAgNVP6s3NJW+83X67dP/946kHAM6HE9dv/uQnw//sT3+S3vzm8dUCAHmzOpDf9CZp3TrpyBHz+yCQnnqqehf8AKgGqwP5uuvMwxsnTkiXX150NQCQL6sDWZLWry+6AgAYj1KcsgCAKiCQAcASlQ5k7hYGYBPrZ8h54W5hALapbIfM3cIAbFPZQOZuYQC2qWwgc7dweTH7h6sqG8iu3C1ctXAazP6jKFIcxwuzf9e/b1REHMepf910002xS6ampmLf92PP82Lf9+OpqamiS8pkamoqrtfrsaSFX/V6feT3Ufbv2ff9s77fwS/f94suDRhK0nScImOtvu0NowVBoCiKlnzu+756vd6Sz889WSKZfxWU6brPWq2m5f6b9TxP8/PzBVQEJEt721tlRxYuyLox6cLJEmb/cBmBXGJZw8mFkyWuzP6B5RDIJZY1nFzoLnnDClxGIJdY1nBypbvkvYJwVSkCuWpHu7LIEk50l4DdrD9l4cLJAADV5swpCxdOBgBAGtYHsgsnAwAgDesD2YWTAQCQhvWB7MrJAABIYnUgh2G4MENetWqVJHEyoCI4WYMqsvaNIeeerjh9+vRCZ0wYu423uaCqrD32lvXiHLiDnz1cU/pjb5yuqC5+9qgqawOZ0xXVxc8eVWVtIHO6orr42aOqrA1k7l2oLn72qCprN/UAwBWl39QDgKohkAHAEgQyAFiCQAYASxDIAGCJTKcsPM87KGnpM60AgFH8OI4vT1qUKZABAPlhZAEAliCQAcASBDIAWIJABgBLEMgAYAkCGQAsQSADgCUIZACwBIEMAJb4L/4/ciktfwZ6AAAAAElFTkSuQmCC\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Plot outputs\n",
- "plt.scatter(diabetes_X_test, diabetes_y_test, color='black')\n",
- "plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)\n",
- "\n",
- "plt.xticks(())\n",
- "plt.yticks(())\n",
- "\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## References\n",
- "* [Generalized Linear Models](http://scikit-learn.org/stable/modules/linear_model.html)\n",
- "* [Linear Regression Example](http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|