|
@@ -0,0 +1,740 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 9,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>instant</th>\n",
|
|
|
+ " <th>dteday</th>\n",
|
|
|
+ " <th>season</th>\n",
|
|
|
+ " <th>yr</th>\n",
|
|
|
+ " <th>mnth</th>\n",
|
|
|
+ " <th>hr</th>\n",
|
|
|
+ " <th>holiday</th>\n",
|
|
|
+ " <th>weekday</th>\n",
|
|
|
+ " <th>workingday</th>\n",
|
|
|
+ " <th>weathersit</th>\n",
|
|
|
+ " <th>temp</th>\n",
|
|
|
+ " <th>atemp</th>\n",
|
|
|
+ " <th>hum</th>\n",
|
|
|
+ " <th>windspeed</th>\n",
|
|
|
+ " <th>casual</th>\n",
|
|
|
+ " <th>registered</th>\n",
|
|
|
+ " <th>cnt</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>0</th>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>2011-01-01</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>6</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0.24</td>\n",
|
|
|
+ " <td>0.2879</td>\n",
|
|
|
+ " <td>0.81</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>3</td>\n",
|
|
|
+ " <td>13</td>\n",
|
|
|
+ " <td>16</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>1</th>\n",
|
|
|
+ " <td>2</td>\n",
|
|
|
+ " <td>2011-01-01</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>6</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0.22</td>\n",
|
|
|
+ " <td>0.2727</td>\n",
|
|
|
+ " <td>0.80</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>8</td>\n",
|
|
|
+ " <td>32</td>\n",
|
|
|
+ " <td>40</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2</th>\n",
|
|
|
+ " <td>3</td>\n",
|
|
|
+ " <td>2011-01-01</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>2</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>6</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0.22</td>\n",
|
|
|
+ " <td>0.2727</td>\n",
|
|
|
+ " <td>0.80</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>5</td>\n",
|
|
|
+ " <td>27</td>\n",
|
|
|
+ " <td>32</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>3</th>\n",
|
|
|
+ " <td>4</td>\n",
|
|
|
+ " <td>2011-01-01</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>3</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>6</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0.24</td>\n",
|
|
|
+ " <td>0.2879</td>\n",
|
|
|
+ " <td>0.75</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>3</td>\n",
|
|
|
+ " <td>10</td>\n",
|
|
|
+ " <td>13</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>4</th>\n",
|
|
|
+ " <td>5</td>\n",
|
|
|
+ " <td>2011-01-01</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>4</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>6</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>0.24</td>\n",
|
|
|
+ " <td>0.2879</td>\n",
|
|
|
+ " <td>0.75</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " instant dteday season yr mnth hr holiday weekday workingday \\\n",
|
|
|
+ "0 1 2011-01-01 1 0 1 0 0 6 0 \n",
|
|
|
+ "1 2 2011-01-01 1 0 1 1 0 6 0 \n",
|
|
|
+ "2 3 2011-01-01 1 0 1 2 0 6 0 \n",
|
|
|
+ "3 4 2011-01-01 1 0 1 3 0 6 0 \n",
|
|
|
+ "4 5 2011-01-01 1 0 1 4 0 6 0 \n",
|
|
|
+ "\n",
|
|
|
+ " weathersit temp atemp hum windspeed casual registered cnt \n",
|
|
|
+ "0 1 0.24 0.2879 0.81 0 3 13 16 \n",
|
|
|
+ "1 1 0.22 0.2727 0.80 0 8 32 40 \n",
|
|
|
+ "2 1 0.22 0.2727 0.80 0 5 27 32 \n",
|
|
|
+ "3 1 0.24 0.2879 0.75 0 3 10 13 \n",
|
|
|
+ "4 1 0.24 0.2879 0.75 0 0 1 1 "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 9,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "import pandas\n",
|
|
|
+ "\n",
|
|
|
+ "bike_rentals = pandas.read_csv(\"bike_rental_hour.csv\")\n",
|
|
|
+ "bike_rentals.head()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "(array([ 6972., 3705., 2659., 1660., 987., 663., 369., 188.,\n",
|
|
|
+ " 139., 37.]),\n",
|
|
|
+ " array([ 1. , 98.6, 196.2, 293.8, 391.4, 489. , 586.6, 684.2,\n",
|
|
|
+ " 781.8, 879.4, 977. ]),\n",
|
|
|
+ " <a list of 10 Patch objects>)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "image/png": [
|
|
|
+ "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
|
|
|
+ "AAALEgAACxIB0t1+/AAAFD5JREFUeJzt3V+MnNd93vHvI1FMaFsVK7igKJGFiWJViEVT22xFN7UR\n",
|
|
|
+ "OnVVxkgpXUkyUIJIiNywjt0WSCz6ouJV6gRoYxmFdBH/ESXYTFmnIWhUkEUrXjRAAa+dSA0jipXY\n",
|
|
|
+ "lqi4CZdu40hJilak+OvFnD2cEhR3lxzujDTfDzDgec97zsx5D7nz8D3vOzupKiRJArhp3AOQJE0O\n",
|
|
|
+ "Q0GS1BkKkqTOUJAkdYaCJKkzFCRJ3ZKhkOSvJ3lh6PF6ks8kuT3JsSSvJHkuyfqhPvuTvJrkZJL7\n",
|
|
|
+ "huq3JTne9j12ow5KknRtspLPKSS5CZgH7gV+EfifVfVrST4H/OWqeiTJVuAbwN8B7gK+A8xUVSWZ\n",
|
|
|
+ "Az5dVXNJngG+VFXPjviYJEnXaKXLR58ATlXVa8Au4GCrPwg80Mr3A4eq6nxVnQZOAduTbARuraq5\n",
|
|
|
+ "1u6poT6SpAmw0lB4GDjUyhuqaqGVF4ANrXwncGaozxkGZwyX18+3eknShFh2KCRZC/wj4N9dvq8G\n",
|
|
|
+ "a1D+vgxJeodbs4K2PwP8XlX9sG0vJLmjqs62paFzrX4e2DzUbxODM4T5Vh6un7/8RZIYLpK0QlWV\n",
|
|
|
+ "UTzPSkLhU1xaOgI4CuwBfrX9eWSo/htJ/jWD5aEZYK5daH4jyXZgDtgNfOnKLzXOXFhzEd5aV1Vv\n",
|
|
|
+ "jnEQJDlQVQfGOYZJ4Vxc4lxc4lxcMsr/TC8rFJK8l8FF5l8Yqv4CcDjJXuA08CBAVZ1Ichg4AVwA\n",
|
|
|
+ "9tWlW5z2AU8C64BnvPNIkibLskKhqv4CeP9ldX/CICiu1P5XgF+5Qv3vAX9z5cOUJK0GP9E8uWbH\n",
|
|
|
+ "PYAJMjvuAUyQ2XEPYILMjnsA70Yr+vDaahisjXlNQZKWK0mN6kKzZwqSpM5QkCR1hoIkqTMUJEmd\n",
|
|
|
+ "oSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\n",
|
|
|
+ "UJAkdYaCJKkzFCRJnaEgSeqWFQpJ1if5ZpKXk5xIsj3J7UmOJXklyXNJ1g+135/k1SQnk9w3VL8t\n",
|
|
|
+ "yfG277EbcUCSpGu33DOFx4Bnquoe4CeAk8AjwLGquht4vm2TZCvwELAV2Ak8niTteZ4A9lbVDDCT\n",
|
|
|
+ "ZOfIjkSSdN2WDIUktwEfq6qvAlTVhap6HdgFHGzNDgIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtq\n",
|
|
|
+ "qI8kaQIs50xhC/DDJF9L8vtJfiPJe4ENVbXQ2iwAG1r5TuDMUP8zwF1XqJ9v9ZKkCbFmmW0+DHy6\n",
|
|
|
+ "qr6f5Iu0paJFVVVJanTDOjBU3tEekiSAJDu4QW+MywmFM8CZqvp+2/4msB84m+SOqjrblobOtf3z\n",
|
|
|
+ "wOah/pvac8y38nD9/JVf8sAyhy9J06eqZoHZxe0kj47quZdcPqqqs8BrSe5uVZ8AXgK+BexpdXuA\n",
|
|
|
+ "I618FHg4ydokW4AZYK49zxvtzqUAu4f6SJImwHLOFAB+Efh6krXAfwV+DrgZOJxkL3AaeBCgqk4k\n",
|
|
|
+ "OQycAC4A+6pqcWlpH/AksI7B3UzPjug4JEkjkEvv15NhcG1inGNacxHeWldVb45xEJK0bEmqqrJ0\n",
|
|
|
+ "y6X5iWZJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5Q\n",
|
|
|
+ "kCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd2yQiHJ6SR/kOSF\n",
|
|
|
+ "JHOt7vYkx5K8kuS5JOuH2u9P8mqSk0nuG6rfluR42/fY6A9HknQ9lnumUMCOqvpQVd3b6h4BjlXV\n",
|
|
|
+ "3cDzbZskW4GHgK3ATuDxJGl9ngD2VtUMMJNk54iOQ5I0AitZPspl27uAg618EHigle8HDlXV+ao6\n",
|
|
|
+ "DZwCtifZCNxaVXOt3VNDfSRJE2AlZwrfSfKDJL/Q6jZU1UIrLwAbWvlO4MxQ3zPAXVeon2/1kqQJ\n",
|
|
|
+ "sWaZ7f5eVf1xkr8CHEtycnhnVVWSGt2wDgyVd7SHJAkgyQ5u0BvjskKhqv64/fnDJL8N3AssJLmj\n",
|
|
|
+ "qs62paFzrfk8sHmo+yYGZwjzrTxcP3/lVzyw/COQpClTVbPA7OJ2kkdH9dxLLh8leU+SW1v5vcB9\n",
|
|
|
+ "wHHgKLCnNdsDHGnlo8DDSdYm2QLMAHNVdRZ4I8n2duF591AfSdIEWM6Zwgbgt9sNRGuAr1fVc0l+\n",
|
|
|
+ "ABxOshc4DTwIUFUnkhwGTgAXgH1Vtbi0tA94ElgHPFNVz47wWCRJ1ymX3q8nw+DaxDjHtOYivLWu\n",
|
|
|
+ "qt4c4yAkadmSVFVdfofoNfETzZKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\n",
|
|
|
+ "UJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJn\n",
|
|
|
+ "KEiSumWFQpKbk7yQ5Ftt+/Ykx5K8kuS5JOuH2u5P8mqSk0nuG6rfluR42/fY6A9FknS9lnum8Fng\n",
|
|
|
+ "BFBt+xHgWFXdDTzftkmyFXgI2ArsBB5PktbnCWBvVc0AM0l2juYQJEmjsmQoJNkEfBL4MrD4Br8L\n",
|
|
|
+ "ONjKB4EHWvl+4FBVna+q08ApYHuSjcCtVTXX2j011EeSNCGWc6bw68AvAReH6jZU1UIrLwAbWvlO\n",
|
|
|
+ "4MxQuzPAXVeon2/1kqQJsuZqO5P8LHCuql5IsuNKbaqqktSV9l27A0PlHe0hSQJo78c7bsRzXzUU\n",
|
|
|
+ "gJ8EdiX5JPDjwF9K8jSwkOSOqjrblobOtfbzwOah/psYnCHMt/Jw/fzbv+yBFRyCJE2XqpoFZhe3\n",
|
|
|
+ "kzw6que+6vJRVX2+qjZX1RbgYeB3qmo3cBTY05rtAY608lHg4SRrk2wBZoC5qjoLvJFke7vwvHuo\n",
|
|
|
+ "jyRpQix1pnC5xWWiLwCHk+wFTgMPAlTViSSHGdypdAHYV1WLffYBTwLrgGeq6tnrG7okadRy6T17\n",
|
|
|
+ "MgyuT4xzTGsuwlvrqurNMQ5CkpYtSVVVlm65ND/RLEnqDAVJUmcoSJI6Q0GS1BkKkqRupbekTov/\n",
|
|
|
+ "e+n3+I3HqO4kkKSVMBTe1jhvizUPJI2Hy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNB\n",
|
|
|
+ "ktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1F01FJL8eJLvJXkxyYkk/7LV357k\n",
|
|
|
+ "WJJXkjyXZP1Qn/1JXk1yMsl9Q/Xbkhxv+x67cYckSbpWVw2Fqvo/wMer6oPATwAfT/JR4BHgWFXd\n",
|
|
|
+ "DTzftkmyFXgI2ArsBB7Ppe+1fALYW1UzwEySnTfigCRJ127J5aOq+t+tuBa4GfgRsAs42OoPAg+0\n",
|
|
|
+ "8v3Aoao6X1WngVPA9iQbgVuraq61e2qojyRpQiwZCkluSvIisAB8t6peAjZU1UJrsgBsaOU7gTND\n",
|
|
|
+ "3c8Ad12hfr7VS5ImyJqlGlTVReCDSW4Dvp3k45ftryQj/pb7A0PlHe0hSQJIsoMb9Ma4ZCgsqqrX\n",
|
|
|
+ "k/wHYBuwkOSOqjrblobOtWbzwOahbpsYnCHMt/Jw/fzbv9qB5Q5LkqZOVc0Cs4vbSR4d1XMvdffR\n",
|
|
|
+ "+xfvLEqyDvgHwAvAUWBPa7YHONLKR4GHk6xNsgWYAeaq6izwRpLt7cLz7qE+kqQJsdSZwkbgYJKb\n",
|
|
|
+ "GATI01X1fJIXgMNJ9gKngQcBqupEksPACeACsK+qFpeW9gFPAuuAZ6rq2VEfjCTp+uTSe/ZkGFyf\n",
|
|
|
+ "GOeY1lyEt24a7xhCVWXpdpI0eN8c1XuGn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6ZX+iWatr\n",
|
|
|
+ "9L86ZOW8LVaaPobCxBp3JpgH0jRy+UiS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpD\n",
|
|
|
+ "QZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVK3ZCgk2Zzku0leSvKHST7T6m9PcizJK0meS7J+\n",
|
|
|
+ "qM/+JK8mOZnkvqH6bUmOt32P3ZhDkiRdq+WcKZwH/llV/Q3gI8A/SXIP8AhwrKruBp5v2yTZCjwE\n",
|
|
|
+ "bAV2Ao8nWfzGlieAvVU1A8wk2TnSo5EkXZclQ6GqzlbVi63858DLwF3ALuBga3YQeKCV7wcOVdX5\n",
|
|
|
+ "qjoNnAK2J9kI3FpVc63dU0N9JEkTYEXXFJJ8APgQ8D1gQ1UttF0LwIZWvhM4M9TtDIMQubx+vtVL\n",
|
|
|
+ "kibEsr+jOcn7gN8CPltVf3ZpRQiqqkb7RfMHhso72kOSBJBkBzfojXFZoZDkFgaB8HRVHWnVC0nu\n",
|
|
|
+ "qKqzbWnoXKufBzYPdd/E4AxhvpWH6+ev/IoHljl8SZo+VTULzC5uJ3l0VM+9nLuPAnwFOFFVXxza\n",
|
|
|
+ "dRTY08p7gCND9Q8nWZtkCzADzFXVWeCNJNvbc+4e6iNJmgCpuvqqT5KPAv8R+ANgsfF+YA44DPxV\n",
|
|
|
+ "4DTwYFX9aevzeeDngQsMlpu+3eq3AU8C64BnquozV3i9uvQy47DmIrx103jHEMb7+oMxVFWWbidp\n",
|
|
|
+ "3JLUqH5elwyF1WYogKEgaSVGGQp+olmS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeqW/WsuNH1G+6tL\n",
|
|
|
+ "ro23xUqry1DQVYw7E8wDabW5fCRJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnq\n",
|
|
|
+ "DAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKlbMhSSfDXJQpLjQ3W3JzmW5JUkzyVZP7Rvf5JXk5xM\n",
|
|
|
+ "ct9Q/bYkx9u+x0Z/KJKk67WcM4WvATsvq3sEOFZVdwPPt22SbAUeAra2Po8nWfymlCeAvVU1A8wk\n",
|
|
|
+ "ufw5JUljtmQoVNXvAj+6rHoXcLCVDwIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtqqI8kaUJc6zWF\n",
|
|
|
+ "DVW10MoLwIZWvhM4M9TuDHDXFernW70kaYJc93c0V1WN/gveDwyVd7SHptHo/22tTFX5RdGaOEl2\n",
|
|
|
+ "cIPeGK81FBaS3FFVZ9vS0LlWPw9sHmq3icEZwnwrD9fPv/3TH7jGYendZ5yZYB5oMlXVLDC7uJ3k\n",
|
|
|
+ "0VE997UuHx0F9rTyHuDIUP3DSdYm2QLMAHNVdRZ4I8n2duF591AfSdKEWPJMIckh4KeA9yd5DfgX\n",
|
|
|
+ "wBeAw0n2AqeBBwGq6kSSw8AJ4AKwr6oW/6u3D3gSWAc8U1XPjvZQJEnXK5fesyfDYA15nGNacxHe\n",
|
|
|
+ "umn8yxbj/ntxDBCvKegdIUmN6t+qn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK66/41F9K72bh/\n",
|
|
|
+ "zQb4qza0ugwF6arGnQnmgVaXy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOj+nIE04P0Cn\n",
|
|
|
+ "1WQoSBNv3JlgHkwTl48kSZ2hIEnqDAVJUmcoSJI6LzRLWtK474Dy7qfVs+qhkGQn8EXgZuDLVfWr\n",
|
|
|
+ "qz0GSSs1zkzI2EMJpieYVnX5KMnNwL8BdgJbgU8luWc1x/DOMTvuAUyQ2XEPYILMjnsAY1JXeHz3\n",
|
|
|
+ "bepvxGN6rPY1hXuBU1V1uqrOA78J3L/KY3iHmB33ACbI7LgHMEFmxz2ACTI77gG8K612KNwFvDa0\n",
|
|
|
+ "fabVSZImwGpfU1jmedhPv35jh3E1b902vteWNKmm5bpGqlbvOJN8BDhQVTvb9n7g4vDF5kmYeEl6\n",
|
|
|
+ "pxlVYKx2KKwB/gvw94E/AuaAT1XVy6s2CEnS21rV5aOqupDk08C3GdyS+hUDQZImx6qeKUiSJtvE\n",
|
|
|
+ "/JqLJDuTnEzyapLPjXs8N1qSzUm+m+SlJH+Y5DOt/vYkx5K8kuS5JOuH+uxv83MyyX3jG/3oJbk5\n",
|
|
|
+ "yQtJvtW2p3IeAJKsT/LNJC8nOZFk+zTORzuul5IcT/KNJD82TfOQ5KtJFpIcH6pb8fEn2dbm8NUk\n",
|
|
|
+ "jy35wlU19geDpaRTwAeAW4AXgXvGPa4bfMx3AB9s5fcxuNZyD/BrwC+3+s8BX2jlrW1ebmnzdAq4\n",
|
|
|
+ "adzHMcL5+OfA14GjbXsq56Ed40Hg51t5DXDbtM1HO5b/BvxY2/63wJ5pmgfgY8CHgONDdSs5/sWV\n",
|
|
|
+ "oDng3lZ+Bth5tdedlDOFqftQW1WdraoXW/nPgZcZfGZjF4M3BdqfD7Ty/cChqjpfVacZ/KXfu6qD\n",
|
|
|
+ "vkGSbAI+CXyZS9/oMnXzAJDkNuBjVfVVGFyHq6rXmb75eAM4D7yn3aDyHgY3p0zNPFTV7wI/uqx6\n",
|
|
|
+ "Jce/PclG4Naqmmvtnhrqc0WTEgpT/aG2JB9g8D+C7wEbqmqh7VoANrTynQzmZdG7aY5+Hfgl4OJQ\n",
|
|
|
+ "3TTOA8AW4IdJvpbk95P8RpL3MmXzUVV/Avwr4H8wCIM/rapjTNk8XMFKj//y+nmWmJdJCYWpvdqd\n",
|
|
|
+ "5H3AbwGfrao/G95Xg/O9q83NO37ekvwscK6qXuBtvvdxGuZhyBrgw8DjVfVh4C+AR4YbTMN8JPlr\n",
|
|
|
+ "wD9lsBRyJ/C+JP94uM00zMPVLOP4r8mkhMI8sHloezP/f7q9KyW5hUEgPF1VR1r1QpI72v6NwLlW\n",
|
|
|
+ "f/kcbWp173Q/CexK8t+BQ8BPJ3ma6ZuHRWeAM1X1/bb9TQYhcXbK5uNvA/+pqv5XVV0A/j3wd5m+\n",
|
|
|
+ "ebjcSn4uzrT6TZfVX3VeJiUUfgDMJPlAkrXAQ8DRMY/phkoS4CvAiar64tCuowwuqNH+PDJU/3CS\n",
|
|
|
+ "tUm2ADMMLiC9o1XV56tqc1VtAR4GfqeqdjNl87Coqs4CryW5u1V9AngJ+BbTNR8ngY8kWdd+Vj4B\n",
|
|
|
+ "nGD65uFyK/q5aP+e3mh3sAXYPdTnysZ9hX3oqvrPMLgD5xSwf9zjWYXj/SiDNfQXgRfaYydwO/Ad\n",
|
|
|
+ "4BXgOWD9UJ/Pt/k5CfzDcR/DDZiTn+LS3UfTPA9/C/g+8J8Z/A/5tmmcD+CXGQTicQYXVW+Zpnlg\n",
|
|
|
+ "cOb8R8CbDK65/ty1HD+wrc3hKeBLS72uH16TJHWTsnwkSZoAhoIkqTMUJEmdoSBJ6gwFSVJnKEiS\n",
|
|
|
+ "OkNBktQZCpKk7v8BIgy2anPl5soAAAAASUVORK5CYII=\n"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ "<matplotlib.figure.Figure at 0x10790ef28>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "display_data"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "%matplotlib inline\n",
|
|
|
+ "\n",
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
+ "\n",
|
|
|
+ "plt.hist(bike_rentals[\"cnt\"])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 11,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "instant 0.278379\n",
|
|
|
+ "season 0.178056\n",
|
|
|
+ "yr 0.250495\n",
|
|
|
+ "mnth 0.120638\n",
|
|
|
+ "hr 0.394071\n",
|
|
|
+ "holiday -0.030927\n",
|
|
|
+ "weekday 0.026900\n",
|
|
|
+ "workingday 0.030284\n",
|
|
|
+ "weathersit -0.142426\n",
|
|
|
+ "temp 0.404772\n",
|
|
|
+ "atemp 0.400929\n",
|
|
|
+ "hum -0.322911\n",
|
|
|
+ "windspeed 0.093234\n",
|
|
|
+ "casual 0.694564\n",
|
|
|
+ "registered 0.972151\n",
|
|
|
+ "cnt 1.000000\n",
|
|
|
+ "Name: cnt, dtype: float64"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 11,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "bike_rentals.corr()[\"cnt\"]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 12,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def assign_label(hour):\n",
|
|
|
+ " if hour >=0 and hour < 6:\n",
|
|
|
+ " return 4\n",
|
|
|
+ " elif hour >=6 and hour < 12:\n",
|
|
|
+ " return 1\n",
|
|
|
+ " elif hour >= 12 and hour < 18:\n",
|
|
|
+ " return 2\n",
|
|
|
+ " elif hour >= 18 and hour <=24:\n",
|
|
|
+ " return 3\n",
|
|
|
+ "\n",
|
|
|
+ "bike_rentals[\"time_label\"] = bike_rentals[\"hr\"].apply(assign_label)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## Error metric\n",
|
|
|
+ "\n",
|
|
|
+ "The mean squared error metric makes the most sense to evaluate our error. MSE works on continuous numeric data, which fits our data quite well."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 13,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "train = bike_rentals.sample(frac=.8)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 14,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "test = bike_rentals.loc[~bike_rentals.index.isin(train.index)]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 18,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "LinearRegression(copy_X=True, fit_intercept=True, normalize=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 18,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "from sklearn.linear_model import LinearRegression\n",
|
|
|
+ "\n",
|
|
|
+ "predictors = list(train.columns)\n",
|
|
|
+ "predictors.remove(\"cnt\")\n",
|
|
|
+ "predictors.remove(\"casual\")\n",
|
|
|
+ "predictors.remove(\"registered\")\n",
|
|
|
+ "predictors.remove(\"dteday\")\n",
|
|
|
+ "\n",
|
|
|
+ "reg = LinearRegression()\n",
|
|
|
+ "\n",
|
|
|
+ "reg.fit(train[predictors], train[\"cnt\"])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 19,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "16586.154698429491"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 19,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "import numpy\n",
|
|
|
+ "predictions = reg.predict(test[predictors])\n",
|
|
|
+ "\n",
|
|
|
+ "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 20,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "array([ -75.31906346, 144.15652539, 125.29713548, ..., 167.94469909,\n",
|
|
|
+ " 181.44415684, 165.3047817 ])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 20,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "actual"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 21,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "4 1\n",
|
|
|
+ "10 36\n",
|
|
|
+ "16 93\n",
|
|
|
+ "24 17\n",
|
|
|
+ "36 75\n",
|
|
|
+ "39 76\n",
|
|
|
+ "40 65\n",
|
|
|
+ "45 9\n",
|
|
|
+ "48 2\n",
|
|
|
+ "52 64\n",
|
|
|
+ "68 12\n",
|
|
|
+ "72 2\n",
|
|
|
+ "76 179\n",
|
|
|
+ "80 78\n",
|
|
|
+ "81 97\n",
|
|
|
+ "87 112\n",
|
|
|
+ "88 54\n",
|
|
|
+ "90 35\n",
|
|
|
+ "92 6\n",
|
|
|
+ "109 169\n",
|
|
|
+ "111 89\n",
|
|
|
+ "112 43\n",
|
|
|
+ "113 42\n",
|
|
|
+ "115 11\n",
|
|
|
+ "122 219\n",
|
|
|
+ "133 112\n",
|
|
|
+ "138 17\n",
|
|
|
+ "144 84\n",
|
|
|
+ "146 134\n",
|
|
|
+ "147 63\n",
|
|
|
+ " ... \n",
|
|
|
+ "17232 34\n",
|
|
|
+ "17243 31\n",
|
|
|
+ "17245 8\n",
|
|
|
+ "17255 32\n",
|
|
|
+ "17265 45\n",
|
|
|
+ "17269 75\n",
|
|
|
+ "17280 63\n",
|
|
|
+ "17289 51\n",
|
|
|
+ "17291 239\n",
|
|
|
+ "17292 191\n",
|
|
|
+ "17298 225\n",
|
|
|
+ "17301 213\n",
|
|
|
+ "17302 128\n",
|
|
|
+ "17304 92\n",
|
|
|
+ "17309 19\n",
|
|
|
+ "17311 3\n",
|
|
|
+ "17312 3\n",
|
|
|
+ "17315 44\n",
|
|
|
+ "17316 49\n",
|
|
|
+ "17327 66\n",
|
|
|
+ "17339 33\n",
|
|
|
+ "17340 74\n",
|
|
|
+ "17343 144\n",
|
|
|
+ "17346 138\n",
|
|
|
+ "17348 123\n",
|
|
|
+ "17349 125\n",
|
|
|
+ "17351 72\n",
|
|
|
+ "17353 36\n",
|
|
|
+ "17354 49\n",
|
|
|
+ "17373 122\n",
|
|
|
+ "Name: cnt, dtype: int64"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 21,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "test[\"cnt\"]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## Error\n",
|
|
|
+ "\n",
|
|
|
+ "The error is very high, which may be due to the fact that the data has a few extremely high rental counts, but otherwise mostly low counts. Larger errors are penalized more with MSE, which leads to a higher total error."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 25,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "DecisionTreeRegressor(compute_importances=None, criterion='mse',\n",
|
|
|
+ " max_depth=None, max_features=None, max_leaf_nodes=None,\n",
|
|
|
+ " min_density=None, min_samples_leaf=5, min_samples_split=2,\n",
|
|
|
+ " random_state=None, splitter='best')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 25,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "from sklearn.tree import DecisionTreeRegressor\n",
|
|
|
+ "\n",
|
|
|
+ "reg = DecisionTreeRegressor(min_samples_leaf=5)\n",
|
|
|
+ "\n",
|
|
|
+ "reg.fit(train[predictors], train[\"cnt\"])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 26,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "2644.2820429330714"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 26,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "predictions = reg.predict(test[predictors])\n",
|
|
|
+ "\n",
|
|
|
+ "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 28,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "2964.7288070579207"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 28,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "reg = DecisionTreeRegressor(min_samples_leaf=2)\n",
|
|
|
+ "\n",
|
|
|
+ "reg.fit(train[predictors], train[\"cnt\"])\n",
|
|
|
+ "\n",
|
|
|
+ "predictions = reg.predict(test[predictors])\n",
|
|
|
+ "\n",
|
|
|
+ "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## Decision tree error\n",
|
|
|
+ "\n",
|
|
|
+ "By taking the nonlinear predictors into account, the decision tree regressor appears to have much higher accuracy than linear regression."
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 30,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "RandomForestRegressor(bootstrap=True, compute_importances=None,\n",
|
|
|
+ " criterion='mse', max_depth=None, max_features='auto',\n",
|
|
|
+ " max_leaf_nodes=None, min_density=None, min_samples_leaf=5,\n",
|
|
|
+ " min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False,\n",
|
|
|
+ " random_state=None, verbose=0)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 30,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "from sklearn.ensemble import RandomForestRegressor\n",
|
|
|
+ "\n",
|
|
|
+ "reg = RandomForestRegressor(min_samples_leaf=5)\n",
|
|
|
+ "reg.fit(train[predictors], train[\"cnt\"])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 31,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "1911.9827104170736"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 31,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "predictions = reg.predict(test[predictors])\n",
|
|
|
+ "\n",
|
|
|
+ "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## Random forest error\n",
|
|
|
+ "\n",
|
|
|
+ "By removing some of the sources of overfitting, the random forest accuracy is improved over the decision tree accuracy."
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.4.2"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 0
|
|
|
+}
|