Queer European MD passionate about IT
Selaa lähdekoodia

Add in projects

Vik Paruchuri 9 vuotta sitten
vanhempi
sitoutus
ad014501fe
1 muutettua tiedostoa jossa 740 lisäystä ja 0 poistoa
  1. 740 0
      Mission213Solution.ipynb

+ 740 - 0
Mission213Solution.ipynb

@@ -0,0 +1,740 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>instant</th>\n",
+       "      <th>dteday</th>\n",
+       "      <th>season</th>\n",
+       "      <th>yr</th>\n",
+       "      <th>mnth</th>\n",
+       "      <th>hr</th>\n",
+       "      <th>holiday</th>\n",
+       "      <th>weekday</th>\n",
+       "      <th>workingday</th>\n",
+       "      <th>weathersit</th>\n",
+       "      <th>temp</th>\n",
+       "      <th>atemp</th>\n",
+       "      <th>hum</th>\n",
+       "      <th>windspeed</th>\n",
+       "      <th>casual</th>\n",
+       "      <th>registered</th>\n",
+       "      <th>cnt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.24</td>\n",
+       "      <td>0.2879</td>\n",
+       "      <td>0.81</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>13</td>\n",
+       "      <td>16</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.22</td>\n",
+       "      <td>0.2727</td>\n",
+       "      <td>0.80</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>32</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.22</td>\n",
+       "      <td>0.2727</td>\n",
+       "      <td>0.80</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>27</td>\n",
+       "      <td>32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.24</td>\n",
+       "      <td>0.2879</td>\n",
+       "      <td>0.75</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>10</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.24</td>\n",
+       "      <td>0.2879</td>\n",
+       "      <td>0.75</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   instant      dteday  season  yr  mnth  hr  holiday  weekday  workingday  \\\n",
+       "0        1  2011-01-01       1   0     1   0        0        6           0   \n",
+       "1        2  2011-01-01       1   0     1   1        0        6           0   \n",
+       "2        3  2011-01-01       1   0     1   2        0        6           0   \n",
+       "3        4  2011-01-01       1   0     1   3        0        6           0   \n",
+       "4        5  2011-01-01       1   0     1   4        0        6           0   \n",
+       "\n",
+       "   weathersit  temp   atemp   hum  windspeed  casual  registered  cnt  \n",
+       "0           1  0.24  0.2879  0.81          0       3          13   16  \n",
+       "1           1  0.22  0.2727  0.80          0       8          32   40  \n",
+       "2           1  0.22  0.2727  0.80          0       5          27   32  \n",
+       "3           1  0.24  0.2879  0.75          0       3          10   13  \n",
+       "4           1  0.24  0.2879  0.75          0       0           1    1  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas\n",
+    "\n",
+    "bike_rentals = pandas.read_csv(\"bike_rental_hour.csv\")\n",
+    "bike_rentals.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([ 6972.,  3705.,  2659.,  1660.,   987.,   663.,   369.,   188.,\n",
+       "          139.,    37.]),\n",
+       " array([   1. ,   98.6,  196.2,  293.8,  391.4,  489. ,  586.6,  684.2,\n",
+       "         781.8,  879.4,  977. ]),\n",
+       " <a list of 10 Patch objects>)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": [
+       "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
+       "AAALEgAACxIB0t1+/AAAFD5JREFUeJzt3V+MnNd93vHvI1FMaFsVK7igKJGFiWJViEVT22xFN7UR\n",
+       "OnVVxkgpXUkyUIJIiNywjt0WSCz6ouJV6gRoYxmFdBH/ESXYTFmnIWhUkEUrXjRAAa+dSA0jipXY\n",
+       "lqi4CZdu40hJilak+OvFnD2cEhR3lxzujDTfDzDgec97zsx5D7nz8D3vOzupKiRJArhp3AOQJE0O\n",
+       "Q0GS1BkKkqTOUJAkdYaCJKkzFCRJ3ZKhkOSvJ3lh6PF6ks8kuT3JsSSvJHkuyfqhPvuTvJrkZJL7\n",
+       "huq3JTne9j12ow5KknRtspLPKSS5CZgH7gV+EfifVfVrST4H/OWqeiTJVuAbwN8B7gK+A8xUVSWZ\n",
+       "Az5dVXNJngG+VFXPjviYJEnXaKXLR58ATlXVa8Au4GCrPwg80Mr3A4eq6nxVnQZOAduTbARuraq5\n",
+       "1u6poT6SpAmw0lB4GDjUyhuqaqGVF4ANrXwncGaozxkGZwyX18+3eknShFh2KCRZC/wj4N9dvq8G\n",
+       "a1D+vgxJeodbs4K2PwP8XlX9sG0vJLmjqs62paFzrX4e2DzUbxODM4T5Vh6un7/8RZIYLpK0QlWV\n",
+       "UTzPSkLhU1xaOgI4CuwBfrX9eWSo/htJ/jWD5aEZYK5daH4jyXZgDtgNfOnKLzXOXFhzEd5aV1Vv\n",
+       "jnEQJDlQVQfGOYZJ4Vxc4lxc4lxcMsr/TC8rFJK8l8FF5l8Yqv4CcDjJXuA08CBAVZ1Ichg4AVwA\n",
+       "9tWlW5z2AU8C64BnvPNIkibLskKhqv4CeP9ldX/CICiu1P5XgF+5Qv3vAX9z5cOUJK0GP9E8uWbH\n",
+       "PYAJMjvuAUyQ2XEPYILMjnsA70Yr+vDaahisjXlNQZKWK0mN6kKzZwqSpM5QkCR1hoIkqTMUJEmd\n",
+       "oSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\n",
+       "UJAkdYaCJKkzFCRJnaEgSeqWFQpJ1if5ZpKXk5xIsj3J7UmOJXklyXNJ1g+135/k1SQnk9w3VL8t\n",
+       "yfG277EbcUCSpGu33DOFx4Bnquoe4CeAk8AjwLGquht4vm2TZCvwELAV2Ak8niTteZ4A9lbVDDCT\n",
+       "ZOfIjkSSdN2WDIUktwEfq6qvAlTVhap6HdgFHGzNDgIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtq\n",
+       "qI8kaQIs50xhC/DDJF9L8vtJfiPJe4ENVbXQ2iwAG1r5TuDMUP8zwF1XqJ9v9ZKkCbFmmW0+DHy6\n",
+       "qr6f5Iu0paJFVVVJanTDOjBU3tEekiSAJDu4QW+MywmFM8CZqvp+2/4msB84m+SOqjrblobOtf3z\n",
+       "wOah/pvac8y38nD9/JVf8sAyhy9J06eqZoHZxe0kj47quZdcPqqqs8BrSe5uVZ8AXgK+BexpdXuA\n",
+       "I618FHg4ydokW4AZYK49zxvtzqUAu4f6SJImwHLOFAB+Efh6krXAfwV+DrgZOJxkL3AaeBCgqk4k\n",
+       "OQycAC4A+6pqcWlpH/AksI7B3UzPjug4JEkjkEvv15NhcG1inGNacxHeWldVb45xEJK0bEmqqrJ0\n",
+       "y6X5iWZJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5Q\n",
+       "kCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd2yQiHJ6SR/kOSF\n",
+       "JHOt7vYkx5K8kuS5JOuH2u9P8mqSk0nuG6rfluR42/fY6A9HknQ9lnumUMCOqvpQVd3b6h4BjlXV\n",
+       "3cDzbZskW4GHgK3ATuDxJGl9ngD2VtUMMJNk54iOQ5I0AitZPspl27uAg618EHigle8HDlXV+ao6\n",
+       "DZwCtifZCNxaVXOt3VNDfSRJE2AlZwrfSfKDJL/Q6jZU1UIrLwAbWvlO4MxQ3zPAXVeon2/1kqQJ\n",
+       "sWaZ7f5eVf1xkr8CHEtycnhnVVWSGt2wDgyVd7SHJAkgyQ5u0BvjskKhqv64/fnDJL8N3AssJLmj\n",
+       "qs62paFzrfk8sHmo+yYGZwjzrTxcP3/lVzyw/COQpClTVbPA7OJ2kkdH9dxLLh8leU+SW1v5vcB9\n",
+       "wHHgKLCnNdsDHGnlo8DDSdYm2QLMAHNVdRZ4I8n2duF591AfSdIEWM6Zwgbgt9sNRGuAr1fVc0l+\n",
+       "ABxOshc4DTwIUFUnkhwGTgAXgH1Vtbi0tA94ElgHPFNVz47wWCRJ1ymX3q8nw+DaxDjHtOYivLWu\n",
+       "qt4c4yAkadmSVFVdfofoNfETzZKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\n",
+       "UJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJn\n",
+       "KEiSumWFQpKbk7yQ5Ftt+/Ykx5K8kuS5JOuH2u5P8mqSk0nuG6rfluR42/fY6A9FknS9lnum8Fng\n",
+       "BFBt+xHgWFXdDTzftkmyFXgI2ArsBB5PktbnCWBvVc0AM0l2juYQJEmjsmQoJNkEfBL4MrD4Br8L\n",
+       "ONjKB4EHWvl+4FBVna+q08ApYHuSjcCtVTXX2j011EeSNCGWc6bw68AvAReH6jZU1UIrLwAbWvlO\n",
+       "4MxQuzPAXVeon2/1kqQJsuZqO5P8LHCuql5IsuNKbaqqktSV9l27A0PlHe0hSQJo78c7bsRzXzUU\n",
+       "gJ8EdiX5JPDjwF9K8jSwkOSOqjrblobOtfbzwOah/psYnCHMt/Jw/fzbv+yBFRyCJE2XqpoFZhe3\n",
+       "kzw6que+6vJRVX2+qjZX1RbgYeB3qmo3cBTY05rtAY608lHg4SRrk2wBZoC5qjoLvJFke7vwvHuo\n",
+       "jyRpQix1pnC5xWWiLwCHk+wFTgMPAlTViSSHGdypdAHYV1WLffYBTwLrgGeq6tnrG7okadRy6T17\n",
+       "MgyuT4xzTGsuwlvrqurNMQ5CkpYtSVVVlm65ND/RLEnqDAVJUmcoSJI6Q0GS1BkKkqRupbekTov/\n",
+       "e+n3+I3HqO4kkKSVMBTe1jhvizUPJI2Hy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNB\n",
+       "ktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1F01FJL8eJLvJXkxyYkk/7LV357k\n",
+       "WJJXkjyXZP1Qn/1JXk1yMsl9Q/Xbkhxv+x67cYckSbpWVw2Fqvo/wMer6oPATwAfT/JR4BHgWFXd\n",
+       "DTzftkmyFXgI2ArsBB7Ppe+1fALYW1UzwEySnTfigCRJ127J5aOq+t+tuBa4GfgRsAs42OoPAg+0\n",
+       "8v3Aoao6X1WngVPA9iQbgVuraq61e2qojyRpQiwZCkluSvIisAB8t6peAjZU1UJrsgBsaOU7gTND\n",
+       "3c8Ad12hfr7VS5ImyJqlGlTVReCDSW4Dvp3k45ftryQj/pb7A0PlHe0hSQJIsoMb9Ma4ZCgsqqrX\n",
+       "k/wHYBuwkOSOqjrblobOtWbzwOahbpsYnCHMt/Jw/fzbv9qB5Q5LkqZOVc0Cs4vbSR4d1XMvdffR\n",
+       "+xfvLEqyDvgHwAvAUWBPa7YHONLKR4GHk6xNsgWYAeaq6izwRpLt7cLz7qE+kqQJsdSZwkbgYJKb\n",
+       "GATI01X1fJIXgMNJ9gKngQcBqupEksPACeACsK+qFpeW9gFPAuuAZ6rq2VEfjCTp+uTSe/ZkGFyf\n",
+       "GOeY1lyEt24a7xhCVWXpdpI0eN8c1XuGn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6ZX+iWatr\n",
+       "9L86ZOW8LVaaPobCxBp3JpgH0jRy+UiS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpD\n",
+       "QZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVK3ZCgk2Zzku0leSvKHST7T6m9PcizJK0meS7J+\n",
+       "qM/+JK8mOZnkvqH6bUmOt32P3ZhDkiRdq+WcKZwH/llV/Q3gI8A/SXIP8AhwrKruBp5v2yTZCjwE\n",
+       "bAV2Ao8nWfzGlieAvVU1A8wk2TnSo5EkXZclQ6GqzlbVi63858DLwF3ALuBga3YQeKCV7wcOVdX5\n",
+       "qjoNnAK2J9kI3FpVc63dU0N9JEkTYEXXFJJ8APgQ8D1gQ1UttF0LwIZWvhM4M9TtDIMQubx+vtVL\n",
+       "kibEsr+jOcn7gN8CPltVf3ZpRQiqqkb7RfMHhso72kOSBJBkBzfojXFZoZDkFgaB8HRVHWnVC0nu\n",
+       "qKqzbWnoXKufBzYPdd/E4AxhvpWH6+ev/IoHljl8SZo+VTULzC5uJ3l0VM+9nLuPAnwFOFFVXxza\n",
+       "dRTY08p7gCND9Q8nWZtkCzADzFXVWeCNJNvbc+4e6iNJmgCpuvqqT5KPAv8R+ANgsfF+YA44DPxV\n",
+       "4DTwYFX9aevzeeDngQsMlpu+3eq3AU8C64BnquozV3i9uvQy47DmIrx103jHEMb7+oMxVFWWbidp\n",
+       "3JLUqH5elwyF1WYogKEgaSVGGQp+olmS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeqW/WsuNH1G+6tL\n",
+       "ro23xUqry1DQVYw7E8wDabW5fCRJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnq\n",
+       "DAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKlbMhSSfDXJQpLjQ3W3JzmW5JUkzyVZP7Rvf5JXk5xM\n",
+       "ct9Q/bYkx9u+x0Z/KJKk67WcM4WvATsvq3sEOFZVdwPPt22SbAUeAra2Po8nWfymlCeAvVU1A8wk\n",
+       "ufw5JUljtmQoVNXvAj+6rHoXcLCVDwIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtqqI8kaUJc6zWF\n",
+       "DVW10MoLwIZWvhM4M9TuDHDXFernW70kaYJc93c0V1WN/gveDwyVd7SHptHo/22tTFX5RdGaOEl2\n",
+       "cIPeGK81FBaS3FFVZ9vS0LlWPw9sHmq3icEZwnwrD9fPv/3TH7jGYendZ5yZYB5oMlXVLDC7uJ3k\n",
+       "0VE997UuHx0F9rTyHuDIUP3DSdYm2QLMAHNVdRZ4I8n2duF591AfSdKEWPJMIckh4KeA9yd5DfgX\n",
+       "wBeAw0n2AqeBBwGq6kSSw8AJ4AKwr6oW/6u3D3gSWAc8U1XPjvZQJEnXK5fesyfDYA15nGNacxHe\n",
+       "umn8yxbj/ntxDBCvKegdIUmN6t+qn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK66/41F9K72bh/\n",
+       "zQb4qza0ugwF6arGnQnmgVaXy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOj+nIE04P0Cn\n",
+       "1WQoSBNv3JlgHkwTl48kSZ2hIEnqDAVJUmcoSJI6LzRLWtK474Dy7qfVs+qhkGQn8EXgZuDLVfWr\n",
+       "qz0GSSs1zkzI2EMJpieYVnX5KMnNwL8BdgJbgU8luWc1x/DOMTvuAUyQ2XEPYILMjnsAY1JXeHz3\n",
+       "bepvxGN6rPY1hXuBU1V1uqrOA78J3L/KY3iHmB33ACbI7LgHMEFmxz2ACTI77gG8K612KNwFvDa0\n",
+       "fabVSZImwGpfU1jmedhPv35jh3E1b902vteWNKmm5bpGqlbvOJN8BDhQVTvb9n7g4vDF5kmYeEl6\n",
+       "pxlVYKx2KKwB/gvw94E/AuaAT1XVy6s2CEnS21rV5aOqupDk08C3GdyS+hUDQZImx6qeKUiSJtvE\n",
+       "/JqLJDuTnEzyapLPjXs8N1qSzUm+m+SlJH+Y5DOt/vYkx5K8kuS5JOuH+uxv83MyyX3jG/3oJbk5\n",
+       "yQtJvtW2p3IeAJKsT/LNJC8nOZFk+zTORzuul5IcT/KNJD82TfOQ5KtJFpIcH6pb8fEn2dbm8NUk\n",
+       "jy35wlU19geDpaRTwAeAW4AXgXvGPa4bfMx3AB9s5fcxuNZyD/BrwC+3+s8BX2jlrW1ebmnzdAq4\n",
+       "adzHMcL5+OfA14GjbXsq56Ed40Hg51t5DXDbtM1HO5b/BvxY2/63wJ5pmgfgY8CHgONDdSs5/sWV\n",
+       "oDng3lZ+Bth5tdedlDOFqftQW1WdraoXW/nPgZcZfGZjF4M3BdqfD7Ty/cChqjpfVacZ/KXfu6qD\n",
+       "vkGSbAI+CXyZS9/oMnXzAJDkNuBjVfVVGFyHq6rXmb75eAM4D7yn3aDyHgY3p0zNPFTV7wI/uqx6\n",
+       "Jce/PclG4Naqmmvtnhrqc0WTEgpT/aG2JB9g8D+C7wEbqmqh7VoANrTynQzmZdG7aY5+Hfgl4OJQ\n",
+       "3TTOA8AW4IdJvpbk95P8RpL3MmXzUVV/Avwr4H8wCIM/rapjTNk8XMFKj//y+nmWmJdJCYWpvdqd\n",
+       "5H3AbwGfrao/G95Xg/O9q83NO37ekvwscK6qXuBtvvdxGuZhyBrgw8DjVfVh4C+AR4YbTMN8JPlr\n",
+       "wD9lsBRyJ/C+JP94uM00zMPVLOP4r8mkhMI8sHloezP/f7q9KyW5hUEgPF1VR1r1QpI72v6NwLlW\n",
+       "f/kcbWp173Q/CexK8t+BQ8BPJ3ma6ZuHRWeAM1X1/bb9TQYhcXbK5uNvA/+pqv5XVV0A/j3wd5m+\n",
+       "ebjcSn4uzrT6TZfVX3VeJiUUfgDMJPlAkrXAQ8DRMY/phkoS4CvAiar64tCuowwuqNH+PDJU/3CS\n",
+       "tUm2ADMMLiC9o1XV56tqc1VtAR4GfqeqdjNl87Coqs4CryW5u1V9AngJ+BbTNR8ngY8kWdd+Vj4B\n",
+       "nGD65uFyK/q5aP+e3mh3sAXYPdTnysZ9hX3oqvrPMLgD5xSwf9zjWYXj/SiDNfQXgRfaYydwO/Ad\n",
+       "4BXgOWD9UJ/Pt/k5CfzDcR/DDZiTn+LS3UfTPA9/C/g+8J8Z/A/5tmmcD+CXGQTicQYXVW+Zpnlg\n",
+       "cOb8R8CbDK65/ty1HD+wrc3hKeBLS72uH16TJHWTsnwkSZoAhoIkqTMUJEmdoSBJ6gwFSVJnKEiS\n",
+       "OkNBktQZCpKk7v8BIgy2anPl5soAAAAASUVORK5CYII=\n"
+      ],
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x10790ef28>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%matplotlib inline\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.hist(bike_rentals[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "instant       0.278379\n",
+       "season        0.178056\n",
+       "yr            0.250495\n",
+       "mnth          0.120638\n",
+       "hr            0.394071\n",
+       "holiday      -0.030927\n",
+       "weekday       0.026900\n",
+       "workingday    0.030284\n",
+       "weathersit   -0.142426\n",
+       "temp          0.404772\n",
+       "atemp         0.400929\n",
+       "hum          -0.322911\n",
+       "windspeed     0.093234\n",
+       "casual        0.694564\n",
+       "registered    0.972151\n",
+       "cnt           1.000000\n",
+       "Name: cnt, dtype: float64"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bike_rentals.corr()[\"cnt\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def assign_label(hour):\n",
+    "    if hour >=0 and hour < 6:\n",
+    "        return 4\n",
+    "    elif hour >=6 and hour < 12:\n",
+    "        return 1\n",
+    "    elif hour >= 12 and hour < 18:\n",
+    "        return 2\n",
+    "    elif hour >= 18 and hour <=24:\n",
+    "        return 3\n",
+    "\n",
+    "bike_rentals[\"time_label\"] = bike_rentals[\"hr\"].apply(assign_label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Error metric\n",
+    "\n",
+    "The mean squared error metric makes the most sense to evaluate our error.  MSE works on continuous numeric data, which fits our data quite well."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "train = bike_rentals.sample(frac=.8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "test = bike_rentals.loc[~bike_rentals.index.isin(train.index)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, normalize=False)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "predictors = list(train.columns)\n",
+    "predictors.remove(\"cnt\")\n",
+    "predictors.remove(\"casual\")\n",
+    "predictors.remove(\"registered\")\n",
+    "predictors.remove(\"dteday\")\n",
+    "\n",
+    "reg = LinearRegression()\n",
+    "\n",
+    "reg.fit(train[predictors], train[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "16586.154698429491"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy\n",
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ -75.31906346,  144.15652539,  125.29713548, ...,  167.94469909,\n",
+       "        181.44415684,  165.3047817 ])"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "actual"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4          1\n",
+       "10        36\n",
+       "16        93\n",
+       "24        17\n",
+       "36        75\n",
+       "39        76\n",
+       "40        65\n",
+       "45         9\n",
+       "48         2\n",
+       "52        64\n",
+       "68        12\n",
+       "72         2\n",
+       "76       179\n",
+       "80        78\n",
+       "81        97\n",
+       "87       112\n",
+       "88        54\n",
+       "90        35\n",
+       "92         6\n",
+       "109      169\n",
+       "111       89\n",
+       "112       43\n",
+       "113       42\n",
+       "115       11\n",
+       "122      219\n",
+       "133      112\n",
+       "138       17\n",
+       "144       84\n",
+       "146      134\n",
+       "147       63\n",
+       "        ... \n",
+       "17232     34\n",
+       "17243     31\n",
+       "17245      8\n",
+       "17255     32\n",
+       "17265     45\n",
+       "17269     75\n",
+       "17280     63\n",
+       "17289     51\n",
+       "17291    239\n",
+       "17292    191\n",
+       "17298    225\n",
+       "17301    213\n",
+       "17302    128\n",
+       "17304     92\n",
+       "17309     19\n",
+       "17311      3\n",
+       "17312      3\n",
+       "17315     44\n",
+       "17316     49\n",
+       "17327     66\n",
+       "17339     33\n",
+       "17340     74\n",
+       "17343    144\n",
+       "17346    138\n",
+       "17348    123\n",
+       "17349    125\n",
+       "17351     72\n",
+       "17353     36\n",
+       "17354     49\n",
+       "17373    122\n",
+       "Name: cnt, dtype: int64"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test[\"cnt\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Error\n",
+    "\n",
+    "The error is very high, which may be due to the fact that the data has a few extremely high rental counts, but otherwise mostly low counts.  Larger errors are penalized more with MSE, which leads to a higher total error."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DecisionTreeRegressor(compute_importances=None, criterion='mse',\n",
+       "           max_depth=None, max_features=None, max_leaf_nodes=None,\n",
+       "           min_density=None, min_samples_leaf=5, min_samples_split=2,\n",
+       "           random_state=None, splitter='best')"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "\n",
+    "reg = DecisionTreeRegressor(min_samples_leaf=5)\n",
+    "\n",
+    "reg.fit(train[predictors], train[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2644.2820429330714"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2964.7288070579207"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "reg = DecisionTreeRegressor(min_samples_leaf=2)\n",
+    "\n",
+    "reg.fit(train[predictors], train[\"cnt\"])\n",
+    "\n",
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Decision tree error\n",
+    "\n",
+    "By taking the nonlinear predictors into account, the decision tree regressor appears to have much higher accuracy than linear regression."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(bootstrap=True, compute_importances=None,\n",
+       "           criterion='mse', max_depth=None, max_features='auto',\n",
+       "           max_leaf_nodes=None, min_density=None, min_samples_leaf=5,\n",
+       "           min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False,\n",
+       "           random_state=None, verbose=0)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import RandomForestRegressor\n",
+    "\n",
+    "reg = RandomForestRegressor(min_samples_leaf=5)\n",
+    "reg.fit(train[predictors], train[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1911.9827104170736"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Random forest error\n",
+    "\n",
+    "By removing some of the sources of overfitting, the random forest accuracy is improved over the decision tree accuracy."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}