{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
instantdtedayseasonyrmnthhrholidayweekdayworkingdayweathersittempatemphumwindspeedcasualregisteredcnt
012011-01-01101006010.240.28790.81031316
122011-01-01101106010.220.27270.80083240
232011-01-01101206010.220.27270.80052732
342011-01-01101306010.240.28790.75031013
452011-01-01101406010.240.28790.750011
\n", "
" ], "text/plain": [ " instant dteday season yr mnth hr holiday weekday workingday \\\n", "0 1 2011-01-01 1 0 1 0 0 6 0 \n", "1 2 2011-01-01 1 0 1 1 0 6 0 \n", "2 3 2011-01-01 1 0 1 2 0 6 0 \n", "3 4 2011-01-01 1 0 1 3 0 6 0 \n", "4 5 2011-01-01 1 0 1 4 0 6 0 \n", "\n", " weathersit temp atemp hum windspeed casual registered cnt \n", "0 1 0.24 0.2879 0.81 0 3 13 16 \n", "1 1 0.22 0.2727 0.80 0 8 32 40 \n", "2 1 0.22 0.2727 0.80 0 5 27 32 \n", "3 1 0.24 0.2879 0.75 0 3 10 13 \n", "4 1 0.24 0.2879 0.75 0 0 1 1 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas\n", "\n", "bike_rentals = pandas.read_csv(\"bike_rental_hour.csv\")\n", "bike_rentals.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([ 6972., 3705., 2659., 1660., 987., 663., 369., 188.,\n", " 139., 37.]),\n", " array([ 1. , 98.6, 196.2, 293.8, 391.4, 489. , 586.6, 684.2,\n", " 781.8, 879.4, 977. ]),\n", " )" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFD5JREFUeJzt3V+MnNd93vHvI1FMaFsVK7igKJGFiWJViEVT22xFN7UR\nOnVVxkgpXUkyUIJIiNywjt0WSCz6ouJV6gRoYxmFdBH/ESXYTFmnIWhUkEUrXjRAAa+dSA0jipXY\nlqi4CZdu40hJilak+OvFnD2cEhR3lxzujDTfDzDgec97zsx5D7nz8D3vOzupKiRJArhp3AOQJE0O\nQ0GS1BkKkqTOUJAkdYaCJKkzFCRJ3ZKhkOSvJ3lh6PF6ks8kuT3JsSSvJHkuyfqhPvuTvJrkZJL7\nhuq3JTne9j12ow5KknRtspLPKSS5CZgH7gV+EfifVfVrST4H/OWqeiTJVuAbwN8B7gK+A8xUVSWZ\nAz5dVXNJngG+VFXPjviYJEnXaKXLR58ATlXVa8Au4GCrPwg80Mr3A4eq6nxVnQZOAduTbARuraq5\n1u6poT6SpAmw0lB4GDjUyhuqaqGVF4ANrXwncGaozxkGZwyX18+3eknShFh2KCRZC/wj4N9dvq8G\na1D+vgxJeodbs4K2PwP8XlX9sG0vJLmjqs62paFzrX4e2DzUbxODM4T5Vh6un7/8RZIYLpK0QlWV\nUTzPSkLhU1xaOgI4CuwBfrX9eWSo/htJ/jWD5aEZYK5daH4jyXZgDtgNfOnKLzXOXFhzEd5aV1Vv\njnEQJDlQVQfGOYZJ4Vxc4lxc4lxcMsr/TC8rFJK8l8FF5l8Yqv4CcDjJXuA08CBAVZ1Ichg4AVwA\n9tWlW5z2AU8C64BnvPNIkibLskKhqv4CeP9ldX/CICiu1P5XgF+5Qv3vAX9z5cOUJK0GP9E8uWbH\nPYAJMjvuAUyQ2XEPYILMjnsA70Yr+vDaahisjXlNQZKWK0mN6kKzZwqSpM5QkCR1hoIkqTMUJEmd\noSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\nUJAkdYaCJKkzFCRJnaEgSeqWFQpJ1if5ZpKXk5xIsj3J7UmOJXklyXNJ1g+135/k1SQnk9w3VL8t\nyfG277EbcUCSpGu33DOFx4Bnquoe4CeAk8AjwLGquht4vm2TZCvwELAV2Ak8niTteZ4A9lbVDDCT\nZOfIjkSSdN2WDIUktwEfq6qvAlTVhap6HdgFHGzNDgIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtq\nqI8kaQIs50xhC/DDJF9L8vtJfiPJe4ENVbXQ2iwAG1r5TuDMUP8zwF1XqJ9v9ZKkCbFmmW0+DHy6\nqr6f5Iu0paJFVVVJanTDOjBU3tEekiSAJDu4QW+MywmFM8CZqvp+2/4msB84m+SOqjrblobOtf3z\nwOah/pvac8y38nD9/JVf8sAyhy9J06eqZoHZxe0kj47quZdcPqqqs8BrSe5uVZ8AXgK+BexpdXuA\nI618FHg4ydokW4AZYK49zxvtzqUAu4f6SJImwHLOFAB+Efh6krXAfwV+DrgZOJxkL3AaeBCgqk4k\nOQycAC4A+6pqcWlpH/AksI7B3UzPjug4JEkjkEvv15NhcG1inGNacxHeWldVb45xEJK0bEmqqrJ0\ny6X5iWZJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5Q\nkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd2yQiHJ6SR/kOSF\nJHOt7vYkx5K8kuS5JOuH2u9P8mqSk0nuG6rfluR42/fY6A9HknQ9lnumUMCOqvpQVd3b6h4BjlXV\n3cDzbZskW4GHgK3ATuDxJGl9ngD2VtUMMJNk54iOQ5I0AitZPspl27uAg618EHigle8HDlXV+ao6\nDZwCtifZCNxaVXOt3VNDfSRJE2AlZwrfSfKDJL/Q6jZU1UIrLwAbWvlO4MxQ3zPAXVeon2/1kqQJ\nsWaZ7f5eVf1xkr8CHEtycnhnVVWSGt2wDgyVd7SHJAkgyQ5u0BvjskKhqv64/fnDJL8N3AssJLmj\nqs62paFzrfk8sHmo+yYGZwjzrTxcP3/lVzyw/COQpClTVbPA7OJ2kkdH9dxLLh8leU+SW1v5vcB9\nwHHgKLCnNdsDHGnlo8DDSdYm2QLMAHNVdRZ4I8n2duF591AfSdIEWM6Zwgbgt9sNRGuAr1fVc0l+\nABxOshc4DTwIUFUnkhwGTgAXgH1Vtbi0tA94ElgHPFNVz47wWCRJ1ymX3q8nw+DaxDjHtOYivLWu\nqt4c4yAkadmSVFVdfofoNfETzZKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\nUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJn\nKEiSumWFQpKbk7yQ5Ftt+/Ykx5K8kuS5JOuH2u5P8mqSk0nuG6rfluR42/fY6A9FknS9lnum8Fng\nBFBt+xHgWFXdDTzftkmyFXgI2ArsBB5PktbnCWBvVc0AM0l2juYQJEmjsmQoJNkEfBL4MrD4Br8L\nONjKB4EHWvl+4FBVna+q08ApYHuSjcCtVTXX2j011EeSNCGWc6bw68AvAReH6jZU1UIrLwAbWvlO\n4MxQuzPAXVeon2/1kqQJsuZqO5P8LHCuql5IsuNKbaqqktSV9l27A0PlHe0hSQJo78c7bsRzXzUU\ngJ8EdiX5JPDjwF9K8jSwkOSOqjrblobOtfbzwOah/psYnCHMt/Jw/fzbv+yBFRyCJE2XqpoFZhe3\nkzw6que+6vJRVX2+qjZX1RbgYeB3qmo3cBTY05rtAY608lHg4SRrk2wBZoC5qjoLvJFke7vwvHuo\njyRpQix1pnC5xWWiLwCHk+wFTgMPAlTViSSHGdypdAHYV1WLffYBTwLrgGeq6tnrG7okadRy6T17\nMgyuT4xzTGsuwlvrqurNMQ5CkpYtSVVVlm65ND/RLEnqDAVJUmcoSJI6Q0GS1BkKkqRupbekTov/\ne+n3+I3HqO4kkKSVMBTe1jhvizUPJI2Hy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNB\nktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1F01FJL8eJLvJXkxyYkk/7LV357k\nWJJXkjyXZP1Qn/1JXk1yMsl9Q/Xbkhxv+x67cYckSbpWVw2Fqvo/wMer6oPATwAfT/JR4BHgWFXd\nDTzftkmyFXgI2ArsBB7Ppe+1fALYW1UzwEySnTfigCRJ127J5aOq+t+tuBa4GfgRsAs42OoPAg+0\n8v3Aoao6X1WngVPA9iQbgVuraq61e2qojyRpQiwZCkluSvIisAB8t6peAjZU1UJrsgBsaOU7gTND\n3c8Ad12hfr7VS5ImyJqlGlTVReCDSW4Dvp3k45ftryQj/pb7A0PlHe0hSQJIsoMb9Ma4ZCgsqqrX\nk/wHYBuwkOSOqjrblobOtWbzwOahbpsYnCHMt/Jw/fzbv9qB5Q5LkqZOVc0Cs4vbSR4d1XMvdffR\n+xfvLEqyDvgHwAvAUWBPa7YHONLKR4GHk6xNsgWYAeaq6izwRpLt7cLz7qE+kqQJsdSZwkbgYJKb\nGATI01X1fJIXgMNJ9gKngQcBqupEksPACeACsK+qFpeW9gFPAuuAZ6rq2VEfjCTp+uTSe/ZkGFyf\nGOeY1lyEt24a7xhCVWXpdpI0eN8c1XuGn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6ZX+iWatr\n9L86ZOW8LVaaPobCxBp3JpgH0jRy+UiS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpD\nQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVK3ZCgk2Zzku0leSvKHST7T6m9PcizJK0meS7J+\nqM/+JK8mOZnkvqH6bUmOt32P3ZhDkiRdq+WcKZwH/llV/Q3gI8A/SXIP8AhwrKruBp5v2yTZCjwE\nbAV2Ao8nWfzGlieAvVU1A8wk2TnSo5EkXZclQ6GqzlbVi63858DLwF3ALuBga3YQeKCV7wcOVdX5\nqjoNnAK2J9kI3FpVc63dU0N9JEkTYEXXFJJ8APgQ8D1gQ1UttF0LwIZWvhM4M9TtDIMQubx+vtVL\nkibEsr+jOcn7gN8CPltVf3ZpRQiqqkb7RfMHhso72kOSBJBkBzfojXFZoZDkFgaB8HRVHWnVC0nu\nqKqzbWnoXKufBzYPdd/E4AxhvpWH6+ev/IoHljl8SZo+VTULzC5uJ3l0VM+9nLuPAnwFOFFVXxza\ndRTY08p7gCND9Q8nWZtkCzADzFXVWeCNJNvbc+4e6iNJmgCpuvqqT5KPAv8R+ANgsfF+YA44DPxV\n4DTwYFX9aevzeeDngQsMlpu+3eq3AU8C64BnquozV3i9uvQy47DmIrx103jHEMb7+oMxVFWWbidp\n3JLUqH5elwyF1WYogKEgaSVGGQp+olmS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeqW/WsuNH1G+6tL\nro23xUqry1DQVYw7E8wDabW5fCRJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnq\nDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKlbMhSSfDXJQpLjQ3W3JzmW5JUkzyVZP7Rvf5JXk5xM\nct9Q/bYkx9u+x0Z/KJKk67WcM4WvATsvq3sEOFZVdwPPt22SbAUeAra2Po8nWfymlCeAvVU1A8wk\nufw5JUljtmQoVNXvAj+6rHoXcLCVDwIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtqqI8kaUJc6zWF\nDVW10MoLwIZWvhM4M9TuDHDXFernW70kaYJc93c0V1WN/gveDwyVd7SHptHo/22tTFX5RdGaOEl2\ncIPeGK81FBaS3FFVZ9vS0LlWPw9sHmq3icEZwnwrD9fPv/3TH7jGYendZ5yZYB5oMlXVLDC7uJ3k\n0VE997UuHx0F9rTyHuDIUP3DSdYm2QLMAHNVdRZ4I8n2duF591AfSdKEWPJMIckh4KeA9yd5DfgX\nwBeAw0n2AqeBBwGq6kSSw8AJ4AKwr6oW/6u3D3gSWAc8U1XPjvZQJEnXK5fesyfDYA15nGNacxHe\numn8yxbj/ntxDBCvKegdIUmN6t+qn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK66/41F9K72bh/\nzQb4qza0ugwF6arGnQnmgVaXy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOj+nIE04P0Cn\n1WQoSBNv3JlgHkwTl48kSZ2hIEnqDAVJUmcoSJI6LzRLWtK474Dy7qfVs+qhkGQn8EXgZuDLVfWr\nqz0GSSs1zkzI2EMJpieYVnX5KMnNwL8BdgJbgU8luWc1x/DOMTvuAUyQ2XEPYILMjnsAY1JXeHz3\nbepvxGN6rPY1hXuBU1V1uqrOA78J3L/KY3iHmB33ACbI7LgHMEFmxz2ACTI77gG8K612KNwFvDa0\nfabVSZImwGpfU1jmedhPv35jh3E1b902vteWNKmm5bpGqlbvOJN8BDhQVTvb9n7g4vDF5kmYeEl6\npxlVYKx2KKwB/gvw94E/AuaAT1XVy6s2CEnS21rV5aOqupDk08C3GdyS+hUDQZImx6qeKUiSJtvE\n/JqLJDuTnEzyapLPjXs8N1qSzUm+m+SlJH+Y5DOt/vYkx5K8kuS5JOuH+uxv83MyyX3jG/3oJbk5\nyQtJvtW2p3IeAJKsT/LNJC8nOZFk+zTORzuul5IcT/KNJD82TfOQ5KtJFpIcH6pb8fEn2dbm8NUk\njy35wlU19geDpaRTwAeAW4AXgXvGPa4bfMx3AB9s5fcxuNZyD/BrwC+3+s8BX2jlrW1ebmnzdAq4\nadzHMcL5+OfA14GjbXsq56Ed40Hg51t5DXDbtM1HO5b/BvxY2/63wJ5pmgfgY8CHgONDdSs5/sWV\noDng3lZ+Bth5tdedlDOFqftQW1WdraoXW/nPgZcZfGZjF4M3BdqfD7Ty/cChqjpfVacZ/KXfu6qD\nvkGSbAI+CXyZS9/oMnXzAJDkNuBjVfVVGFyHq6rXmb75eAM4D7yn3aDyHgY3p0zNPFTV7wI/uqx6\nJce/PclG4Naqmmvtnhrqc0WTEgpT/aG2JB9g8D+C7wEbqmqh7VoANrTynQzmZdG7aY5+Hfgl4OJQ\n3TTOA8AW4IdJvpbk95P8RpL3MmXzUVV/Avwr4H8wCIM/rapjTNk8XMFKj//y+nmWmJdJCYWpvdqd\n5H3AbwGfrao/G95Xg/O9q83NO37ekvwscK6qXuBtvvdxGuZhyBrgw8DjVfVh4C+AR4YbTMN8JPlr\nwD9lsBRyJ/C+JP94uM00zMPVLOP4r8mkhMI8sHloezP/f7q9KyW5hUEgPF1VR1r1QpI72v6NwLlW\nf/kcbWp173Q/CexK8t+BQ8BPJ3ma6ZuHRWeAM1X1/bb9TQYhcXbK5uNvA/+pqv5XVV0A/j3wd5m+\nebjcSn4uzrT6TZfVX3VeJiUUfgDMJPlAkrXAQ8DRMY/phkoS4CvAiar64tCuowwuqNH+PDJU/3CS\ntUm2ADMMLiC9o1XV56tqc1VtAR4GfqeqdjNl87Coqs4CryW5u1V9AngJ+BbTNR8ngY8kWdd+Vj4B\nnGD65uFyK/q5aP+e3mh3sAXYPdTnysZ9hX3oqvrPMLgD5xSwf9zjWYXj/SiDNfQXgRfaYydwO/Ad\n4BXgOWD9UJ/Pt/k5CfzDcR/DDZiTn+LS3UfTPA9/C/g+8J8Z/A/5tmmcD+CXGQTicQYXVW+Zpnlg\ncOb8R8CbDK65/ty1HD+wrc3hKeBLS72uH16TJHWTsnwkSZoAhoIkqTMUJEmdoSBJ6gwFSVJnKEiS\nOkNBktQZCpKk7v8BIgy2anPl5soAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "plt.hist(bike_rentals[\"cnt\"])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "instant 0.278379\n", "season 0.178056\n", "yr 0.250495\n", "mnth 0.120638\n", "hr 0.394071\n", "holiday -0.030927\n", "weekday 0.026900\n", "workingday 0.030284\n", "weathersit -0.142426\n", "temp 0.404772\n", "atemp 0.400929\n", "hum -0.322911\n", "windspeed 0.093234\n", "casual 0.694564\n", "registered 0.972151\n", "cnt 1.000000\n", "Name: cnt, dtype: float64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bike_rentals.corr()[\"cnt\"]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def assign_label(hour):\n", " if hour >=0 and hour < 6:\n", " return 4\n", " elif hour >=6 and hour < 12:\n", " return 1\n", " elif hour >= 12 and hour < 18:\n", " return 2\n", " elif hour >= 18 and hour <=24:\n", " return 3\n", "\n", "bike_rentals[\"time_label\"] = bike_rentals[\"hr\"].apply(assign_label)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Error Metric\n", "\n", "The mean squared error metric makes the most sense to evaluate our error. MSE works on continuous numeric data, which fits our data quite well." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "train = bike_rentals.sample(frac=.8)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "test = bike_rentals.loc[~bike_rentals.index.isin(train.index)]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, normalize=False)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LinearRegression\n", "\n", "predictors = list(train.columns)\n", "predictors.remove(\"cnt\")\n", "predictors.remove(\"casual\")\n", "predictors.remove(\"registered\")\n", "predictors.remove(\"dteday\")\n", "\n", "reg = LinearRegression()\n", "\n", "reg.fit(train[predictors], train[\"cnt\"])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "16586.154698429491" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy\n", "predictions = reg.predict(test[predictors])\n", "\n", "numpy.mean((predictions - test[\"cnt\"]) ** 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Error\n", "\n", "The error is very high, which may be due to the fact that the data has a few extremely high rental counts but otherwise mostly low counts. Larger errors are penalized more with MSE, which leads to a higher total error." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor(compute_importances=None, criterion='mse',\n", " max_depth=None, max_features=None, max_leaf_nodes=None,\n", " min_density=None, min_samples_leaf=5, min_samples_split=2,\n", " random_state=None, splitter='best')" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "\n", "reg = DecisionTreeRegressor(min_samples_leaf=5)\n", "\n", "reg.fit(train[predictors], train[\"cnt\"])" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2644.2820429330714" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions = reg.predict(test[predictors])\n", "\n", "numpy.mean((predictions - test[\"cnt\"]) ** 2)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2964.7288070579207" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg = DecisionTreeRegressor(min_samples_leaf=2)\n", "\n", "reg.fit(train[predictors], train[\"cnt\"])\n", "\n", "predictions = reg.predict(test[predictors])\n", "\n", "numpy.mean((predictions - test[\"cnt\"]) ** 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Decision Tree Error\n", "\n", "By taking the nonlinear predictors into account, the decision tree regressor appears to have much higher accuracy than linear regression." ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestRegressor(bootstrap=True, compute_importances=None,\n", " criterion='mse', max_depth=None, max_features='auto',\n", " max_leaf_nodes=None, min_density=None, min_samples_leaf=5,\n", " min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False,\n", " random_state=None, verbose=0)" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestRegressor\n", "\n", "reg = RandomForestRegressor(min_samples_leaf=5)\n", "reg.fit(train[predictors], train[\"cnt\"])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1911.9827104170736" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions = reg.predict(test[predictors])\n", "\n", "numpy.mean((predictions - test[\"cnt\"]) ** 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Random Forest Error\n", "\n", "By removing some of the sources of overfitting, the random forest accuracy is improved over the decision tree accuracy." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }