Queer European MD passionate about IT
Browse Source

Merge pull request #4 from dataquestio/feature/vik/projects

Add 2 projects
Vik Paruchuri 9 years ago
parent
commit
ecd2aede14
4 changed files with 6669 additions and 0 deletions
  1. 2654 0
      Mission209Solution.ipynb
  2. 2476 0
      Mission210Solution.ipynb
  3. 740 0
      Mission213Solution.ipynb
  4. 799 0
      Misson211Solution.ipynb

+ 2654 - 0
Mission209Solution.ipynb

@@ -0,0 +1,2654 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import pandas\n",
+    "\n",
+    "movies = pandas.read_csv(\"fandango_score_comparison.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>FILM</th>\n",
+       "      <th>RottenTomatoes</th>\n",
+       "      <th>RottenTomatoes_User</th>\n",
+       "      <th>Metacritic</th>\n",
+       "      <th>Metacritic_User</th>\n",
+       "      <th>IMDB</th>\n",
+       "      <th>Fandango_Stars</th>\n",
+       "      <th>Fandango_Ratingvalue</th>\n",
+       "      <th>RT_norm</th>\n",
+       "      <th>RT_user_norm</th>\n",
+       "      <th>...</th>\n",
+       "      <th>IMDB_norm</th>\n",
+       "      <th>RT_norm_round</th>\n",
+       "      <th>RT_user_norm_round</th>\n",
+       "      <th>Metacritic_norm_round</th>\n",
+       "      <th>Metacritic_user_norm_round</th>\n",
+       "      <th>IMDB_norm_round</th>\n",
+       "      <th>Metacritic_user_vote_count</th>\n",
+       "      <th>IMDB_user_vote_count</th>\n",
+       "      <th>Fandango_votes</th>\n",
+       "      <th>Fandango_Difference</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Avengers: Age of Ultron (2015)</td>\n",
+       "      <td>74</td>\n",
+       "      <td>86</td>\n",
+       "      <td>66</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>7.8</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.70</td>\n",
+       "      <td>4.30</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1330</td>\n",
+       "      <td>271107</td>\n",
+       "      <td>14846</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Cinderella (2015)</td>\n",
+       "      <td>85</td>\n",
+       "      <td>80</td>\n",
+       "      <td>67</td>\n",
+       "      <td>7.5</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.25</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.55</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>249</td>\n",
+       "      <td>65709</td>\n",
+       "      <td>12640</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Ant-Man (2015)</td>\n",
+       "      <td>80</td>\n",
+       "      <td>90</td>\n",
+       "      <td>64</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>7.8</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>4.50</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>627</td>\n",
+       "      <td>103660</td>\n",
+       "      <td>12055</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Do You Believe? (2015)</td>\n",
+       "      <td>18</td>\n",
+       "      <td>84</td>\n",
+       "      <td>22</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>5.4</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>0.90</td>\n",
+       "      <td>4.20</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.70</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>31</td>\n",
+       "      <td>3136</td>\n",
+       "      <td>1793</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Hot Tub Time Machine 2 (2015)</td>\n",
+       "      <td>14</td>\n",
+       "      <td>28</td>\n",
+       "      <td>29</td>\n",
+       "      <td>3.4</td>\n",
+       "      <td>5.1</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>0.70</td>\n",
+       "      <td>1.40</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.55</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>88</td>\n",
+       "      <td>19560</td>\n",
+       "      <td>1021</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>The Water Diviner (2015)</td>\n",
+       "      <td>63</td>\n",
+       "      <td>62</td>\n",
+       "      <td>50</td>\n",
+       "      <td>6.8</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.15</td>\n",
+       "      <td>3.10</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>34</td>\n",
+       "      <td>39373</td>\n",
+       "      <td>397</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Irrational Man (2015)</td>\n",
+       "      <td>42</td>\n",
+       "      <td>53</td>\n",
+       "      <td>53</td>\n",
+       "      <td>7.6</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>2.10</td>\n",
+       "      <td>2.65</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.45</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>17</td>\n",
+       "      <td>2680</td>\n",
+       "      <td>252</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Top Five (2014)</td>\n",
+       "      <td>86</td>\n",
+       "      <td>64</td>\n",
+       "      <td>81</td>\n",
+       "      <td>6.8</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.30</td>\n",
+       "      <td>3.20</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.25</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>124</td>\n",
+       "      <td>16876</td>\n",
+       "      <td>3223</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Shaun the Sheep Movie (2015)</td>\n",
+       "      <td>99</td>\n",
+       "      <td>82</td>\n",
+       "      <td>81</td>\n",
+       "      <td>8.8</td>\n",
+       "      <td>7.4</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.95</td>\n",
+       "      <td>4.10</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.70</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>62</td>\n",
+       "      <td>12227</td>\n",
+       "      <td>896</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Love &amp; Mercy (2015)</td>\n",
+       "      <td>89</td>\n",
+       "      <td>87</td>\n",
+       "      <td>80</td>\n",
+       "      <td>8.5</td>\n",
+       "      <td>7.8</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.45</td>\n",
+       "      <td>4.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>54</td>\n",
+       "      <td>5367</td>\n",
+       "      <td>864</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Far From The Madding Crowd (2015)</td>\n",
+       "      <td>84</td>\n",
+       "      <td>77</td>\n",
+       "      <td>71</td>\n",
+       "      <td>7.5</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.20</td>\n",
+       "      <td>3.85</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>35</td>\n",
+       "      <td>12129</td>\n",
+       "      <td>804</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Black Sea (2015)</td>\n",
+       "      <td>82</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>6.6</td>\n",
+       "      <td>6.4</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.10</td>\n",
+       "      <td>3.00</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.20</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>37</td>\n",
+       "      <td>16547</td>\n",
+       "      <td>218</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Leviathan (2014)</td>\n",
+       "      <td>99</td>\n",
+       "      <td>79</td>\n",
+       "      <td>92</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>7.7</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.95</td>\n",
+       "      <td>3.95</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.85</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>145</td>\n",
+       "      <td>22521</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Unbroken (2014)</td>\n",
+       "      <td>51</td>\n",
+       "      <td>70</td>\n",
+       "      <td>59</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>2.55</td>\n",
+       "      <td>3.50</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>218</td>\n",
+       "      <td>77518</td>\n",
+       "      <td>9443</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>The Imitation Game (2014)</td>\n",
+       "      <td>90</td>\n",
+       "      <td>92</td>\n",
+       "      <td>73</td>\n",
+       "      <td>8.2</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>4.50</td>\n",
+       "      <td>4.60</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>566</td>\n",
+       "      <td>334164</td>\n",
+       "      <td>8055</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Taken 3 (2015)</td>\n",
+       "      <td>9</td>\n",
+       "      <td>46</td>\n",
+       "      <td>26</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>0.45</td>\n",
+       "      <td>2.30</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.05</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>240</td>\n",
+       "      <td>104235</td>\n",
+       "      <td>6757</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Ted 2 (2015)</td>\n",
+       "      <td>46</td>\n",
+       "      <td>58</td>\n",
+       "      <td>48</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>6.6</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>2.30</td>\n",
+       "      <td>2.90</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.30</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>197</td>\n",
+       "      <td>49102</td>\n",
+       "      <td>6437</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Southpaw (2015)</td>\n",
+       "      <td>59</td>\n",
+       "      <td>80</td>\n",
+       "      <td>57</td>\n",
+       "      <td>8.2</td>\n",
+       "      <td>7.8</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>2.95</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>128</td>\n",
+       "      <td>23561</td>\n",
+       "      <td>5597</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Night at the Museum: Secret of the Tomb (2014)</td>\n",
+       "      <td>50</td>\n",
+       "      <td>58</td>\n",
+       "      <td>47</td>\n",
+       "      <td>5.8</td>\n",
+       "      <td>6.3</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>2.50</td>\n",
+       "      <td>2.90</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.15</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>103</td>\n",
+       "      <td>50291</td>\n",
+       "      <td>5445</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Pixels (2015)</td>\n",
+       "      <td>17</td>\n",
+       "      <td>54</td>\n",
+       "      <td>27</td>\n",
+       "      <td>5.3</td>\n",
+       "      <td>5.6</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>0.85</td>\n",
+       "      <td>2.70</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.80</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>246</td>\n",
+       "      <td>19521</td>\n",
+       "      <td>3886</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>McFarland, USA (2015)</td>\n",
+       "      <td>79</td>\n",
+       "      <td>89</td>\n",
+       "      <td>60</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>7.5</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>3.95</td>\n",
+       "      <td>4.45</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>59</td>\n",
+       "      <td>13769</td>\n",
+       "      <td>3364</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Insidious: Chapter 3 (2015)</td>\n",
+       "      <td>59</td>\n",
+       "      <td>56</td>\n",
+       "      <td>52</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>6.3</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>2.95</td>\n",
+       "      <td>2.80</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.15</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>115</td>\n",
+       "      <td>25134</td>\n",
+       "      <td>3276</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>The Man From U.N.C.L.E. (2015)</td>\n",
+       "      <td>68</td>\n",
+       "      <td>80</td>\n",
+       "      <td>55</td>\n",
+       "      <td>7.9</td>\n",
+       "      <td>7.6</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>3.40</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.80</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>22104</td>\n",
+       "      <td>2686</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Run All Night (2015)</td>\n",
+       "      <td>60</td>\n",
+       "      <td>59</td>\n",
+       "      <td>59</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>6.6</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>3.00</td>\n",
+       "      <td>2.95</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.30</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>141</td>\n",
+       "      <td>50438</td>\n",
+       "      <td>2066</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Trainwreck (2015)</td>\n",
+       "      <td>85</td>\n",
+       "      <td>74</td>\n",
+       "      <td>75</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>4.25</td>\n",
+       "      <td>3.70</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.35</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>169</td>\n",
+       "      <td>27380</td>\n",
+       "      <td>8381</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Selma (2014)</td>\n",
+       "      <td>99</td>\n",
+       "      <td>86</td>\n",
+       "      <td>89</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>7.5</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>4.95</td>\n",
+       "      <td>4.30</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>316</td>\n",
+       "      <td>45344</td>\n",
+       "      <td>7025</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Ex Machina (2015)</td>\n",
+       "      <td>92</td>\n",
+       "      <td>86</td>\n",
+       "      <td>78</td>\n",
+       "      <td>7.9</td>\n",
+       "      <td>7.7</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>4.60</td>\n",
+       "      <td>4.30</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.85</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>672</td>\n",
+       "      <td>154499</td>\n",
+       "      <td>3458</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Still Alice (2015)</td>\n",
+       "      <td>88</td>\n",
+       "      <td>85</td>\n",
+       "      <td>72</td>\n",
+       "      <td>7.8</td>\n",
+       "      <td>7.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>4.40</td>\n",
+       "      <td>4.25</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>153</td>\n",
+       "      <td>57123</td>\n",
+       "      <td>1258</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>Wild Tales (2014)</td>\n",
+       "      <td>96</td>\n",
+       "      <td>92</td>\n",
+       "      <td>77</td>\n",
+       "      <td>8.8</td>\n",
+       "      <td>8.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>4.80</td>\n",
+       "      <td>4.60</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4.10</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>107</td>\n",
+       "      <td>50285</td>\n",
+       "      <td>235</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>The End of the Tour (2015)</td>\n",
+       "      <td>92</td>\n",
+       "      <td>89</td>\n",
+       "      <td>84</td>\n",
+       "      <td>7.5</td>\n",
+       "      <td>7.9</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>4.60</td>\n",
+       "      <td>4.45</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.95</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1320</td>\n",
+       "      <td>121</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>116</th>\n",
+       "      <td>Clouds of Sils Maria (2015)</td>\n",
+       "      <td>89</td>\n",
+       "      <td>67</td>\n",
+       "      <td>78</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>6.8</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.4</td>\n",
+       "      <td>4.45</td>\n",
+       "      <td>3.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.40</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>36</td>\n",
+       "      <td>11392</td>\n",
+       "      <td>162</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>117</th>\n",
+       "      <td>Testament of Youth (2015)</td>\n",
+       "      <td>81</td>\n",
+       "      <td>79</td>\n",
+       "      <td>77</td>\n",
+       "      <td>7.9</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.9</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>3.95</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.65</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>15</td>\n",
+       "      <td>5495</td>\n",
+       "      <td>127</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>118</th>\n",
+       "      <td>Infinitely Polar Bear (2015)</td>\n",
+       "      <td>80</td>\n",
+       "      <td>76</td>\n",
+       "      <td>64</td>\n",
+       "      <td>7.9</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.9</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>3.80</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1062</td>\n",
+       "      <td>124</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>119</th>\n",
+       "      <td>Phoenix (2015)</td>\n",
+       "      <td>99</td>\n",
+       "      <td>81</td>\n",
+       "      <td>91</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.4</td>\n",
+       "      <td>4.95</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>21</td>\n",
+       "      <td>3687</td>\n",
+       "      <td>70</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>120</th>\n",
+       "      <td>The Wolfpack (2015)</td>\n",
+       "      <td>84</td>\n",
+       "      <td>73</td>\n",
+       "      <td>75</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.4</td>\n",
+       "      <td>4.20</td>\n",
+       "      <td>3.65</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.55</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1488</td>\n",
+       "      <td>66</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>121</th>\n",
+       "      <td>The Stanford Prison Experiment (2015)</td>\n",
+       "      <td>84</td>\n",
+       "      <td>87</td>\n",
+       "      <td>68</td>\n",
+       "      <td>8.5</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.9</td>\n",
+       "      <td>4.20</td>\n",
+       "      <td>4.35</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.55</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>950</td>\n",
+       "      <td>51</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>122</th>\n",
+       "      <td>Tangerine (2015)</td>\n",
+       "      <td>95</td>\n",
+       "      <td>86</td>\n",
+       "      <td>86</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>7.4</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.9</td>\n",
+       "      <td>4.75</td>\n",
+       "      <td>4.30</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.70</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>14</td>\n",
+       "      <td>696</td>\n",
+       "      <td>36</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>123</th>\n",
+       "      <td>Magic Mike XXL (2015)</td>\n",
+       "      <td>62</td>\n",
+       "      <td>64</td>\n",
+       "      <td>60</td>\n",
+       "      <td>5.4</td>\n",
+       "      <td>6.3</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>3.10</td>\n",
+       "      <td>3.20</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.15</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>52</td>\n",
+       "      <td>11937</td>\n",
+       "      <td>9363</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>124</th>\n",
+       "      <td>Home (2015)</td>\n",
+       "      <td>45</td>\n",
+       "      <td>65</td>\n",
+       "      <td>55</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>2.25</td>\n",
+       "      <td>3.25</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.35</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>177</td>\n",
+       "      <td>41158</td>\n",
+       "      <td>7705</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>125</th>\n",
+       "      <td>The Wedding Ringer (2015)</td>\n",
+       "      <td>27</td>\n",
+       "      <td>66</td>\n",
+       "      <td>35</td>\n",
+       "      <td>3.3</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>1.35</td>\n",
+       "      <td>3.30</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.35</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>126</td>\n",
+       "      <td>37292</td>\n",
+       "      <td>6506</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>126</th>\n",
+       "      <td>Woman in Gold (2015)</td>\n",
+       "      <td>52</td>\n",
+       "      <td>81</td>\n",
+       "      <td>51</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>7.4</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>2.60</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.70</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>72</td>\n",
+       "      <td>17957</td>\n",
+       "      <td>2435</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>127</th>\n",
+       "      <td>The Last Five Years (2015)</td>\n",
+       "      <td>60</td>\n",
+       "      <td>60</td>\n",
+       "      <td>60</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>3.00</td>\n",
+       "      <td>3.00</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.00</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>20</td>\n",
+       "      <td>4110</td>\n",
+       "      <td>99</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>128</th>\n",
+       "      <td>Mission: Impossible – Rogue Nation (2015)</td>\n",
+       "      <td>92</td>\n",
+       "      <td>90</td>\n",
+       "      <td>75</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>7.8</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>4.60</td>\n",
+       "      <td>4.50</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>362</td>\n",
+       "      <td>82579</td>\n",
+       "      <td>8357</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>129</th>\n",
+       "      <td>Amy (2015)</td>\n",
+       "      <td>97</td>\n",
+       "      <td>91</td>\n",
+       "      <td>85</td>\n",
+       "      <td>8.8</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>4.85</td>\n",
+       "      <td>4.55</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>60</td>\n",
+       "      <td>5630</td>\n",
+       "      <td>729</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>130</th>\n",
+       "      <td>Jurassic World (2015)</td>\n",
+       "      <td>71</td>\n",
+       "      <td>81</td>\n",
+       "      <td>59</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.55</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.65</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>1281</td>\n",
+       "      <td>241807</td>\n",
+       "      <td>34390</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>131</th>\n",
+       "      <td>Minions (2015)</td>\n",
+       "      <td>54</td>\n",
+       "      <td>52</td>\n",
+       "      <td>56</td>\n",
+       "      <td>5.7</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>2.70</td>\n",
+       "      <td>2.60</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.35</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>204</td>\n",
+       "      <td>55895</td>\n",
+       "      <td>14998</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>132</th>\n",
+       "      <td>Max (2015)</td>\n",
+       "      <td>35</td>\n",
+       "      <td>73</td>\n",
+       "      <td>47</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>1.75</td>\n",
+       "      <td>3.65</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.50</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>15</td>\n",
+       "      <td>5444</td>\n",
+       "      <td>3412</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>133</th>\n",
+       "      <td>Paul Blart: Mall Cop 2 (2015)</td>\n",
+       "      <td>5</td>\n",
+       "      <td>36</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2.4</td>\n",
+       "      <td>4.3</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>0.25</td>\n",
+       "      <td>1.80</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.15</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>211</td>\n",
+       "      <td>15004</td>\n",
+       "      <td>3054</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>134</th>\n",
+       "      <td>The Longest Ride (2015)</td>\n",
+       "      <td>31</td>\n",
+       "      <td>73</td>\n",
+       "      <td>33</td>\n",
+       "      <td>4.8</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>1.55</td>\n",
+       "      <td>3.65</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>49</td>\n",
+       "      <td>25214</td>\n",
+       "      <td>2603</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>135</th>\n",
+       "      <td>The Lazarus Effect (2015)</td>\n",
+       "      <td>14</td>\n",
+       "      <td>23</td>\n",
+       "      <td>31</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>5.2</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>0.70</td>\n",
+       "      <td>1.15</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.60</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>62</td>\n",
+       "      <td>17691</td>\n",
+       "      <td>1651</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>136</th>\n",
+       "      <td>The Woman In Black 2 Angel of Death (2015)</td>\n",
+       "      <td>22</td>\n",
+       "      <td>25</td>\n",
+       "      <td>42</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.10</td>\n",
+       "      <td>1.25</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.45</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>55</td>\n",
+       "      <td>14873</td>\n",
+       "      <td>1333</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>137</th>\n",
+       "      <td>Danny Collins (2015)</td>\n",
+       "      <td>77</td>\n",
+       "      <td>75</td>\n",
+       "      <td>58</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.85</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.55</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>33</td>\n",
+       "      <td>11206</td>\n",
+       "      <td>531</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>138</th>\n",
+       "      <td>Spare Parts (2015)</td>\n",
+       "      <td>52</td>\n",
+       "      <td>83</td>\n",
+       "      <td>50</td>\n",
+       "      <td>7.1</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>2.60</td>\n",
+       "      <td>4.15</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>47377</td>\n",
+       "      <td>450</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>139</th>\n",
+       "      <td>Serena (2015)</td>\n",
+       "      <td>18</td>\n",
+       "      <td>25</td>\n",
+       "      <td>36</td>\n",
+       "      <td>5.3</td>\n",
+       "      <td>5.4</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>0.90</td>\n",
+       "      <td>1.25</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.70</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>12165</td>\n",
+       "      <td>50</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>140</th>\n",
+       "      <td>Inside Out (2015)</td>\n",
+       "      <td>98</td>\n",
+       "      <td>90</td>\n",
+       "      <td>94</td>\n",
+       "      <td>8.9</td>\n",
+       "      <td>8.6</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.90</td>\n",
+       "      <td>4.50</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4.30</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>807</td>\n",
+       "      <td>96252</td>\n",
+       "      <td>15749</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>141</th>\n",
+       "      <td>Mr. Holmes (2015)</td>\n",
+       "      <td>87</td>\n",
+       "      <td>78</td>\n",
+       "      <td>67</td>\n",
+       "      <td>7.9</td>\n",
+       "      <td>7.4</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.35</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.70</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>33</td>\n",
+       "      <td>7367</td>\n",
+       "      <td>1348</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>142</th>\n",
+       "      <td>'71 (2015)</td>\n",
+       "      <td>97</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>7.5</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.85</td>\n",
+       "      <td>4.10</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.60</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>60</td>\n",
+       "      <td>24116</td>\n",
+       "      <td>192</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>Two Days, One Night (2014)</td>\n",
+       "      <td>97</td>\n",
+       "      <td>78</td>\n",
+       "      <td>89</td>\n",
+       "      <td>8.8</td>\n",
+       "      <td>7.4</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.85</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.70</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>123</td>\n",
+       "      <td>24345</td>\n",
+       "      <td>118</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>144</th>\n",
+       "      <td>Gett: The Trial of Viviane Amsalem (2015)</td>\n",
+       "      <td>100</td>\n",
+       "      <td>81</td>\n",
+       "      <td>90</td>\n",
+       "      <td>7.3</td>\n",
+       "      <td>7.8</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>5.00</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.90</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1955</td>\n",
+       "      <td>59</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>145</th>\n",
+       "      <td>Kumiko, The Treasure Hunter (2015)</td>\n",
+       "      <td>87</td>\n",
+       "      <td>63</td>\n",
+       "      <td>68</td>\n",
+       "      <td>6.4</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>4.35</td>\n",
+       "      <td>3.15</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.35</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>5289</td>\n",
+       "      <td>41</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>146 rows × 22 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               FILM  RottenTomatoes  \\\n",
+       "0                    Avengers: Age of Ultron (2015)              74   \n",
+       "1                                 Cinderella (2015)              85   \n",
+       "2                                    Ant-Man (2015)              80   \n",
+       "3                            Do You Believe? (2015)              18   \n",
+       "4                     Hot Tub Time Machine 2 (2015)              14   \n",
+       "5                          The Water Diviner (2015)              63   \n",
+       "6                             Irrational Man (2015)              42   \n",
+       "7                                   Top Five (2014)              86   \n",
+       "8                      Shaun the Sheep Movie (2015)              99   \n",
+       "9                               Love & Mercy (2015)              89   \n",
+       "10                Far From The Madding Crowd (2015)              84   \n",
+       "11                                 Black Sea (2015)              82   \n",
+       "12                                 Leviathan (2014)              99   \n",
+       "13                                  Unbroken (2014)              51   \n",
+       "14                        The Imitation Game (2014)              90   \n",
+       "15                                   Taken 3 (2015)               9   \n",
+       "16                                     Ted 2 (2015)              46   \n",
+       "17                                  Southpaw (2015)              59   \n",
+       "18   Night at the Museum: Secret of the Tomb (2014)              50   \n",
+       "19                                    Pixels (2015)              17   \n",
+       "20                            McFarland, USA (2015)              79   \n",
+       "21                      Insidious: Chapter 3 (2015)              59   \n",
+       "22                   The Man From U.N.C.L.E. (2015)              68   \n",
+       "23                             Run All Night (2015)              60   \n",
+       "24                                Trainwreck (2015)              85   \n",
+       "25                                     Selma (2014)              99   \n",
+       "26                                Ex Machina (2015)              92   \n",
+       "27                               Still Alice (2015)              88   \n",
+       "28                                Wild Tales (2014)              96   \n",
+       "29                       The End of the Tour (2015)              92   \n",
+       "..                                              ...             ...   \n",
+       "116                     Clouds of Sils Maria (2015)              89   \n",
+       "117                       Testament of Youth (2015)              81   \n",
+       "118                    Infinitely Polar Bear (2015)              80   \n",
+       "119                                  Phoenix (2015)              99   \n",
+       "120                             The Wolfpack (2015)              84   \n",
+       "121           The Stanford Prison Experiment (2015)              84   \n",
+       "122                                Tangerine (2015)              95   \n",
+       "123                           Magic Mike XXL (2015)              62   \n",
+       "124                                     Home (2015)              45   \n",
+       "125                       The Wedding Ringer (2015)              27   \n",
+       "126                            Woman in Gold (2015)              52   \n",
+       "127                      The Last Five Years (2015)              60   \n",
+       "128     Mission: Impossible – Rogue Nation (2015)              92   \n",
+       "129                                      Amy (2015)              97   \n",
+       "130                           Jurassic World (2015)              71   \n",
+       "131                                  Minions (2015)              54   \n",
+       "132                                      Max (2015)              35   \n",
+       "133                   Paul Blart: Mall Cop 2 (2015)               5   \n",
+       "134                         The Longest Ride (2015)              31   \n",
+       "135                       The Lazarus Effect (2015)              14   \n",
+       "136      The Woman In Black 2 Angel of Death (2015)              22   \n",
+       "137                            Danny Collins (2015)              77   \n",
+       "138                              Spare Parts (2015)              52   \n",
+       "139                                   Serena (2015)              18   \n",
+       "140                               Inside Out (2015)              98   \n",
+       "141                               Mr. Holmes (2015)              87   \n",
+       "142                                      '71 (2015)              97   \n",
+       "143                      Two Days, One Night (2014)              97   \n",
+       "144       Gett: The Trial of Viviane Amsalem (2015)             100   \n",
+       "145              Kumiko, The Treasure Hunter (2015)              87   \n",
+       "\n",
+       "     RottenTomatoes_User  Metacritic  Metacritic_User  IMDB  Fandango_Stars  \\\n",
+       "0                     86          66              7.1   7.8             5.0   \n",
+       "1                     80          67              7.5   7.1             5.0   \n",
+       "2                     90          64              8.1   7.8             5.0   \n",
+       "3                     84          22              4.7   5.4             5.0   \n",
+       "4                     28          29              3.4   5.1             3.5   \n",
+       "5                     62          50              6.8   7.2             4.5   \n",
+       "6                     53          53              7.6   6.9             4.0   \n",
+       "7                     64          81              6.8   6.5             4.0   \n",
+       "8                     82          81              8.8   7.4             4.5   \n",
+       "9                     87          80              8.5   7.8             4.5   \n",
+       "10                    77          71              7.5   7.2             4.5   \n",
+       "11                    60          62              6.6   6.4             4.0   \n",
+       "12                    79          92              7.2   7.7             4.0   \n",
+       "13                    70          59              6.5   7.2             4.5   \n",
+       "14                    92          73              8.2   8.1             5.0   \n",
+       "15                    46          26              4.6   6.1             4.5   \n",
+       "16                    58          48              6.5   6.6             4.5   \n",
+       "17                    80          57              8.2   7.8             5.0   \n",
+       "18                    58          47              5.8   6.3             4.5   \n",
+       "19                    54          27              5.3   5.6             4.5   \n",
+       "20                    89          60              7.2   7.5             5.0   \n",
+       "21                    56          52              6.9   6.3             4.5   \n",
+       "22                    80          55              7.9   7.6             4.5   \n",
+       "23                    59          59              7.3   6.6             4.5   \n",
+       "24                    74          75              6.0   6.7             4.5   \n",
+       "25                    86          89              7.1   7.5             5.0   \n",
+       "26                    86          78              7.9   7.7             4.5   \n",
+       "27                    85          72              7.8   7.5             4.5   \n",
+       "28                    92          77              8.8   8.2             4.5   \n",
+       "29                    89          84              7.5   7.9             4.5   \n",
+       "..                   ...         ...              ...   ...             ...   \n",
+       "116                   67          78              7.1   6.8             3.5   \n",
+       "117                   79          77              7.9   7.3             4.0   \n",
+       "118                   76          64              7.9   7.2             4.0   \n",
+       "119                   81          91              8.0   7.2             3.5   \n",
+       "120                   73          75              7.0   7.1             3.5   \n",
+       "121                   87          68              8.5   7.1             4.0   \n",
+       "122                   86          86              7.3   7.4             4.0   \n",
+       "123                   64          60              5.4   6.3             4.5   \n",
+       "124                   65          55              7.3   6.7             4.5   \n",
+       "125                   66          35              3.3   6.7             4.5   \n",
+       "126                   81          51              7.2   7.4             4.5   \n",
+       "127                   60          60              6.9   6.0             4.5   \n",
+       "128                   90          75              8.0   7.8             4.5   \n",
+       "129                   91          85              8.8   8.0             4.5   \n",
+       "130                   81          59              7.0   7.3             4.5   \n",
+       "131                   52          56              5.7   6.7             4.0   \n",
+       "132                   73          47              5.9   7.0             4.5   \n",
+       "133                   36          13              2.4   4.3             3.5   \n",
+       "134                   73          33              4.8   7.2             4.5   \n",
+       "135                   23          31              4.9   5.2             3.0   \n",
+       "136                   25          42              4.4   4.9             3.0   \n",
+       "137                   75          58              7.1   7.1             4.0   \n",
+       "138                   83          50              7.1   7.2             4.5   \n",
+       "139                   25          36              5.3   5.4             3.0   \n",
+       "140                   90          94              8.9   8.6             4.5   \n",
+       "141                   78          67              7.9   7.4             4.0   \n",
+       "142                   82          83              7.5   7.2             3.5   \n",
+       "143                   78          89              8.8   7.4             3.5   \n",
+       "144                   81          90              7.3   7.8             3.5   \n",
+       "145                   63          68              6.4   6.7             3.5   \n",
+       "\n",
+       "     Fandango_Ratingvalue  RT_norm  RT_user_norm         ...           \\\n",
+       "0                     4.5     3.70          4.30         ...            \n",
+       "1                     4.5     4.25          4.00         ...            \n",
+       "2                     4.5     4.00          4.50         ...            \n",
+       "3                     4.5     0.90          4.20         ...            \n",
+       "4                     3.0     0.70          1.40         ...            \n",
+       "5                     4.0     3.15          3.10         ...            \n",
+       "6                     3.5     2.10          2.65         ...            \n",
+       "7                     3.5     4.30          3.20         ...            \n",
+       "8                     4.0     4.95          4.10         ...            \n",
+       "9                     4.0     4.45          4.35         ...            \n",
+       "10                    4.0     4.20          3.85         ...            \n",
+       "11                    3.5     4.10          3.00         ...            \n",
+       "12                    3.5     4.95          3.95         ...            \n",
+       "13                    4.1     2.55          3.50         ...            \n",
+       "14                    4.6     4.50          4.60         ...            \n",
+       "15                    4.1     0.45          2.30         ...            \n",
+       "16                    4.1     2.30          2.90         ...            \n",
+       "17                    4.6     2.95          4.00         ...            \n",
+       "18                    4.1     2.50          2.90         ...            \n",
+       "19                    4.1     0.85          2.70         ...            \n",
+       "20                    4.6     3.95          4.45         ...            \n",
+       "21                    4.1     2.95          2.80         ...            \n",
+       "22                    4.1     3.40          4.00         ...            \n",
+       "23                    4.1     3.00          2.95         ...            \n",
+       "24                    4.1     4.25          3.70         ...            \n",
+       "25                    4.6     4.95          4.30         ...            \n",
+       "26                    4.1     4.60          4.30         ...            \n",
+       "27                    4.1     4.40          4.25         ...            \n",
+       "28                    4.1     4.80          4.60         ...            \n",
+       "29                    4.1     4.60          4.45         ...            \n",
+       "..                    ...      ...           ...         ...            \n",
+       "116                   3.4     4.45          3.35         ...            \n",
+       "117                   3.9     4.05          3.95         ...            \n",
+       "118                   3.9     4.00          3.80         ...            \n",
+       "119                   3.4     4.95          4.05         ...            \n",
+       "120                   3.4     4.20          3.65         ...            \n",
+       "121                   3.9     4.20          4.35         ...            \n",
+       "122                   3.9     4.75          4.30         ...            \n",
+       "123                   4.4     3.10          3.20         ...            \n",
+       "124                   4.4     2.25          3.25         ...            \n",
+       "125                   4.4     1.35          3.30         ...            \n",
+       "126                   4.4     2.60          4.05         ...            \n",
+       "127                   4.4     3.00          3.00         ...            \n",
+       "128                   4.4     4.60          4.50         ...            \n",
+       "129                   4.4     4.85          4.55         ...            \n",
+       "130                   4.5     3.55          4.05         ...            \n",
+       "131                   4.0     2.70          2.60         ...            \n",
+       "132                   4.5     1.75          3.65         ...            \n",
+       "133                   3.5     0.25          1.80         ...            \n",
+       "134                   4.5     1.55          3.65         ...            \n",
+       "135                   3.0     0.70          1.15         ...            \n",
+       "136                   3.0     1.10          1.25         ...            \n",
+       "137                   4.0     3.85          3.75         ...            \n",
+       "138                   4.5     2.60          4.15         ...            \n",
+       "139                   3.0     0.90          1.25         ...            \n",
+       "140                   4.5     4.90          4.50         ...            \n",
+       "141                   4.0     4.35          3.90         ...            \n",
+       "142                   3.5     4.85          4.10         ...            \n",
+       "143                   3.5     4.85          3.90         ...            \n",
+       "144                   3.5     5.00          4.05         ...            \n",
+       "145                   3.5     4.35          3.15         ...            \n",
+       "\n",
+       "     IMDB_norm  RT_norm_round  RT_user_norm_round  Metacritic_norm_round  \\\n",
+       "0         3.90            3.5                 4.5                    3.5   \n",
+       "1         3.55            4.5                 4.0                    3.5   \n",
+       "2         3.90            4.0                 4.5                    3.0   \n",
+       "3         2.70            1.0                 4.0                    1.0   \n",
+       "4         2.55            0.5                 1.5                    1.5   \n",
+       "5         3.60            3.0                 3.0                    2.5   \n",
+       "6         3.45            2.0                 2.5                    2.5   \n",
+       "7         3.25            4.5                 3.0                    4.0   \n",
+       "8         3.70            5.0                 4.0                    4.0   \n",
+       "9         3.90            4.5                 4.5                    4.0   \n",
+       "10        3.60            4.0                 4.0                    3.5   \n",
+       "11        3.20            4.0                 3.0                    3.0   \n",
+       "12        3.85            5.0                 4.0                    4.5   \n",
+       "13        3.60            2.5                 3.5                    3.0   \n",
+       "14        4.05            4.5                 4.5                    3.5   \n",
+       "15        3.05            0.5                 2.5                    1.5   \n",
+       "16        3.30            2.5                 3.0                    2.5   \n",
+       "17        3.90            3.0                 4.0                    3.0   \n",
+       "18        3.15            2.5                 3.0                    2.5   \n",
+       "19        2.80            1.0                 2.5                    1.5   \n",
+       "20        3.75            4.0                 4.5                    3.0   \n",
+       "21        3.15            3.0                 3.0                    2.5   \n",
+       "22        3.80            3.5                 4.0                    3.0   \n",
+       "23        3.30            3.0                 3.0                    3.0   \n",
+       "24        3.35            4.5                 3.5                    4.0   \n",
+       "25        3.75            5.0                 4.5                    4.5   \n",
+       "26        3.85            4.5                 4.5                    4.0   \n",
+       "27        3.75            4.5                 4.5                    3.5   \n",
+       "28        4.10            5.0                 4.5                    4.0   \n",
+       "29        3.95            4.5                 4.5                    4.0   \n",
+       "..         ...            ...                 ...                    ...   \n",
+       "116       3.40            4.5                 3.5                    4.0   \n",
+       "117       3.65            4.0                 4.0                    4.0   \n",
+       "118       3.60            4.0                 4.0                    3.0   \n",
+       "119       3.60            5.0                 4.0                    4.5   \n",
+       "120       3.55            4.0                 3.5                    4.0   \n",
+       "121       3.55            4.0                 4.5                    3.5   \n",
+       "122       3.70            5.0                 4.5                    4.5   \n",
+       "123       3.15            3.0                 3.0                    3.0   \n",
+       "124       3.35            2.5                 3.5                    3.0   \n",
+       "125       3.35            1.5                 3.5                    2.0   \n",
+       "126       3.70            2.5                 4.0                    2.5   \n",
+       "127       3.00            3.0                 3.0                    3.0   \n",
+       "128       3.90            4.5                 4.5                    4.0   \n",
+       "129       4.00            5.0                 4.5                    4.5   \n",
+       "130       3.65            3.5                 4.0                    3.0   \n",
+       "131       3.35            2.5                 2.5                    3.0   \n",
+       "132       3.50            2.0                 3.5                    2.5   \n",
+       "133       2.15            0.5                 2.0                    0.5   \n",
+       "134       3.60            1.5                 3.5                    1.5   \n",
+       "135       2.60            0.5                 1.0                    1.5   \n",
+       "136       2.45            1.0                 1.5                    2.0   \n",
+       "137       3.55            4.0                 4.0                    3.0   \n",
+       "138       3.60            2.5                 4.0                    2.5   \n",
+       "139       2.70            1.0                 1.5                    2.0   \n",
+       "140       4.30            5.0                 4.5                    4.5   \n",
+       "141       3.70            4.5                 4.0                    3.5   \n",
+       "142       3.60            5.0                 4.0                    4.0   \n",
+       "143       3.70            5.0                 4.0                    4.5   \n",
+       "144       3.90            5.0                 4.0                    4.5   \n",
+       "145       3.35            4.5                 3.0                    3.5   \n",
+       "\n",
+       "     Metacritic_user_norm_round  IMDB_norm_round  Metacritic_user_vote_count  \\\n",
+       "0                           3.5              4.0                        1330   \n",
+       "1                           4.0              3.5                         249   \n",
+       "2                           4.0              4.0                         627   \n",
+       "3                           2.5              2.5                          31   \n",
+       "4                           1.5              2.5                          88   \n",
+       "5                           3.5              3.5                          34   \n",
+       "6                           4.0              3.5                          17   \n",
+       "7                           3.5              3.5                         124   \n",
+       "8                           4.5              3.5                          62   \n",
+       "9                           4.5              4.0                          54   \n",
+       "10                          4.0              3.5                          35   \n",
+       "11                          3.5              3.0                          37   \n",
+       "12                          3.5              4.0                         145   \n",
+       "13                          3.5              3.5                         218   \n",
+       "14                          4.0              4.0                         566   \n",
+       "15                          2.5              3.0                         240   \n",
+       "16                          3.5              3.5                         197   \n",
+       "17                          4.0              4.0                         128   \n",
+       "18                          3.0              3.0                         103   \n",
+       "19                          2.5              3.0                         246   \n",
+       "20                          3.5              4.0                          59   \n",
+       "21                          3.5              3.0                         115   \n",
+       "22                          4.0              4.0                         144   \n",
+       "23                          3.5              3.5                         141   \n",
+       "24                          3.0              3.5                         169   \n",
+       "25                          3.5              4.0                         316   \n",
+       "26                          4.0              4.0                         672   \n",
+       "27                          4.0              4.0                         153   \n",
+       "28                          4.5              4.0                         107   \n",
+       "29                          4.0              4.0                          19   \n",
+       "..                          ...              ...                         ...   \n",
+       "116                         3.5              3.5                          36   \n",
+       "117                         4.0              3.5                          15   \n",
+       "118                         4.0              3.5                           8   \n",
+       "119                         4.0              3.5                          21   \n",
+       "120                         3.5              3.5                           8   \n",
+       "121                         4.5              3.5                           6   \n",
+       "122                         3.5              3.5                          14   \n",
+       "123                         2.5              3.0                          52   \n",
+       "124                         3.5              3.5                         177   \n",
+       "125                         1.5              3.5                         126   \n",
+       "126                         3.5              3.5                          72   \n",
+       "127                         3.5              3.0                          20   \n",
+       "128                         4.0              4.0                         362   \n",
+       "129                         4.5              4.0                          60   \n",
+       "130                         3.5              3.5                        1281   \n",
+       "131                         3.0              3.5                         204   \n",
+       "132                         3.0              3.5                          15   \n",
+       "133                         1.0              2.0                         211   \n",
+       "134                         2.5              3.5                          49   \n",
+       "135                         2.5              2.5                          62   \n",
+       "136                         2.0              2.5                          55   \n",
+       "137                         3.5              3.5                          33   \n",
+       "138                         3.5              3.5                           7   \n",
+       "139                         2.5              2.5                          19   \n",
+       "140                         4.5              4.5                         807   \n",
+       "141                         4.0              3.5                          33   \n",
+       "142                         4.0              3.5                          60   \n",
+       "143                         4.5              3.5                         123   \n",
+       "144                         3.5              4.0                          19   \n",
+       "145                         3.0              3.5                          19   \n",
+       "\n",
+       "     IMDB_user_vote_count  Fandango_votes  Fandango_Difference  \n",
+       "0                  271107           14846                  0.5  \n",
+       "1                   65709           12640                  0.5  \n",
+       "2                  103660           12055                  0.5  \n",
+       "3                    3136            1793                  0.5  \n",
+       "4                   19560            1021                  0.5  \n",
+       "5                   39373             397                  0.5  \n",
+       "6                    2680             252                  0.5  \n",
+       "7                   16876            3223                  0.5  \n",
+       "8                   12227             896                  0.5  \n",
+       "9                    5367             864                  0.5  \n",
+       "10                  12129             804                  0.5  \n",
+       "11                  16547             218                  0.5  \n",
+       "12                  22521              64                  0.5  \n",
+       "13                  77518            9443                  0.4  \n",
+       "14                 334164            8055                  0.4  \n",
+       "15                 104235            6757                  0.4  \n",
+       "16                  49102            6437                  0.4  \n",
+       "17                  23561            5597                  0.4  \n",
+       "18                  50291            5445                  0.4  \n",
+       "19                  19521            3886                  0.4  \n",
+       "20                  13769            3364                  0.4  \n",
+       "21                  25134            3276                  0.4  \n",
+       "22                  22104            2686                  0.4  \n",
+       "23                  50438            2066                  0.4  \n",
+       "24                  27380            8381                  0.4  \n",
+       "25                  45344            7025                  0.4  \n",
+       "26                 154499            3458                  0.4  \n",
+       "27                  57123            1258                  0.4  \n",
+       "28                  50285             235                  0.4  \n",
+       "29                   1320             121                  0.4  \n",
+       "..                    ...             ...                  ...  \n",
+       "116                 11392             162                  0.1  \n",
+       "117                  5495             127                  0.1  \n",
+       "118                  1062             124                  0.1  \n",
+       "119                  3687              70                  0.1  \n",
+       "120                  1488              66                  0.1  \n",
+       "121                   950              51                  0.1  \n",
+       "122                   696              36                  0.1  \n",
+       "123                 11937            9363                  0.1  \n",
+       "124                 41158            7705                  0.1  \n",
+       "125                 37292            6506                  0.1  \n",
+       "126                 17957            2435                  0.1  \n",
+       "127                  4110              99                  0.1  \n",
+       "128                 82579            8357                  0.1  \n",
+       "129                  5630             729                  0.1  \n",
+       "130                241807           34390                  0.0  \n",
+       "131                 55895           14998                  0.0  \n",
+       "132                  5444            3412                  0.0  \n",
+       "133                 15004            3054                  0.0  \n",
+       "134                 25214            2603                  0.0  \n",
+       "135                 17691            1651                  0.0  \n",
+       "136                 14873            1333                  0.0  \n",
+       "137                 11206             531                  0.0  \n",
+       "138                 47377             450                  0.0  \n",
+       "139                 12165              50                  0.0  \n",
+       "140                 96252           15749                  0.0  \n",
+       "141                  7367            1348                  0.0  \n",
+       "142                 24116             192                  0.0  \n",
+       "143                 24345             118                  0.0  \n",
+       "144                  1955              59                  0.0  \n",
+       "145                  5289              41                  0.0  \n",
+       "\n",
+       "[146 rows x 22 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "movies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([ 12.,   0.,  27.,   0.,   0.,  41.,   0.,  55.,   0.,  11.]),\n",
+       " array([ 3. ,  3.2,  3.4,  3.6,  3.8,  4. ,  4.2,  4.4,  4.6,  4.8,  5. ]),\n",
+       " <a list of 10 Patch objects>)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": [
+       "iVBORw0KGgoAAAANSUhEUgAAAXMAAAEACAYAAABBDJb9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
+       "AAALEgAACxIB0t1+/AAADrBJREFUeJzt3X+MpVddx/H3p7sgULGbDWa6oRUwuhaMSIkUAqjTSg02\n",
+       "uPYPU+EP2BAkxqAS/yAuRJPlHwH/wV/RGEEyEoI0SJsiEHZZOioxKTZsAVnqinETquyUIMVCJRb7\n",
+       "9Y95lh2ns3PvzL1z7+x33q/kZp/nuefec+6ZM5979tznuZOqQpJ0ebti3g2QJE3OMJekBgxzSWrA\n",
+       "MJekBgxzSWrAMJekBsYK8yQHknwwyReTnEnywiQHk5xMcjbJiSQHdrqxkqSNjTsz/wPgo1X1bOC5\n",
+       "wP3AMeBkVR0GTg37kqQ5yKiLhpJcBZyuqh9cd/x+4KeraiXJ1cByVV23c02VJF3KODPzZwFfTfKe\n",
+       "JJ9J8udJrgQWqmplKLMCLOxYKyVJmxonzPcDzwf+pKqeD3yLdUsqtTq993sBJGlO9o9R5gHggar6\n",
+       "x2H/g8CbgfNJrq6q80kOAQ+uf2ASA16StqGqspXyI8N8COsvJzlcVWeBlwFfGG5HgXcM/945jQZ1\n",
+       "leR4VR2fdzt2A/viIvviIvviou1MhMeZmQP8OvC+JE8E/hV4LbAPuD3J64BzwG1brVySNB1jhXlV\n",
+       "fRZ4wQZ3vWy6zZEkbYdXgM7O8rwbsIssz7sBu8jyvBuwiyzPuwGXs5HnmU/05Em5Zi5JW7Od7HRm\n",
+       "LkkNGOaS1IBhLkkNGOaS1IBhLkkNGOaS1IBhLkkNGOaS1IBhLkkNGOaS1IBhLkkNGOaS1MC432cu\n",
+       "aROz/qtafoGd1jPMpamZVZ6b43o8l1kkqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIa\n",
+       "MMwlqQHDXJIaMMwlqQHDXJIaGOuLtpKcA/4L+F/g0aq6IclB4APAM4BzwG1V9dAOtVOStIlxZ+YF\n",
+       "LFbV9VV1w3DsGHCyqg4Dp4Z9SdIcbGWZZf33bh4BlobtJeDWqbRIkrRlW5mZfyLJvUlePxxbqKqV\n",
+       "YXsFWJh66yRJYxn3j1O8pKq+kuT7gZNJ7l97Z1XVrP/SiiTporHCvKq+Mvz71SR3ADcAK0murqrz\n",
+       "SQ4BD2702CTH1+wuV9XyZE2WpF6SLAKLEz1H1eYT6iRPAfZV1cNJrgROAG8FXgZ8rarekeQYcKCq\n",
+       "jq17bPm3CrUXrP7PdHZ/Ns7fq962k53jzMwXgDuSXCj/vqo6keRe4PYkr2M4NXGL7ZUkTcnImflE\n",
+       "T+7MXHuEM3NN03ay0ytAJakBw1ySGjDMJakBw1ySGjDMJakBw1ySGjDMJakBw1ySGjDMJakBw1yS\n",
+       "GjDMJakBw1ySGjDMJakBw1ySGjDMJakBw1ySGjDMJakBw1ySGjDMJakBw1ySGjDMJakBw1ySGjDM\n",
+       "JakBw1ySGjDMJakBw1ySGjDMJakBw1ySGjDMJamB/eMUSrIPuBd4oKp+PslB4APAM4BzwG1V9dCO\n",
+       "tVKtJKlZ1VVVmVVd0jyNOzN/I3AGuPBLeAw4WVWHgVPDvrQFNYObtHeMDPMk1wC3AO8CLsxyjgBL\n",
+       "w/YScOuOtE6SNJZxZubvBN4EPLbm2EJVrQzbK8DCtBsmSRrfpmvmSV4BPFhVp5MsblSmqmqzNdAk\n",
+       "x9fsLlfV8jbaKUltDfm6ONFzVF16bTHJ7wKvBr4DPAn4PuBDwAuAxao6n+QQcHdVXbfB48sPoLTe\n",
+       "6pv/LNa0M7MPQGf3mmCWr0vzsZ3s3HSZpareUlXXVtWzgFcCn6yqVwN3AUeHYkeBO7fTYEnSdGz1\n",
+       "PPMLU4+3AzcnOQvcNOxLkuZk02WWiZ/cZRZtwGWWiWtzmaW5qS+zSJIuD4a5JDVgmEtSA4a5JDVg\n",
+       "mEtSA4a5JDVgmEtSA4a5JDVgmEtSA4a5JDVgmEtSA4a5JDVgmEtSA4a5JDVgmEtSA4a5JDVgmEtS\n",
+       "A4a5JDVgmEtSA4a5JDVgmEtSA4a5JDVgmEtSA4a5JDVgmEtSA4a5JDVgmEtSA4a5JDWwaZgneVKS\n",
+       "e5Lcl+RMkrcNxw8mOZnkbJITSQ7MprmSpI1sGuZV9W3gxqp6HvBc4MYkLwWOASer6jBwatiXJM3J\n",
+       "yGWWqnpk2HwisA/4OnAEWBqOLwG37kjrJEljGRnmSa5Ich+wAtxdVV8AFqpqZSiyAizsYBslSSPs\n",
+       "H1Wgqh4DnpfkKuDjSW5cd38lqUs9PsnxNbvLVbW8zbZKUktJFoHFiZ6j6pI5vFGFvwP8N/DLwGJV\n",
+       "nU9yiNUZ+3UblK+qyiQNVD+rb/7jj7sJamJW4292rwlm+bo0H9vJzlFnszztwpkqSZ4M3AycBu4C\n",
+       "jg7FjgJ3br25kqRpGbXMcghYSnIFq8H/3qo6leQ0cHuS1wHngNt2tpmSpM1saZlly0/uMos24DLL\n",
+       "xLW5zNLc1JdZJEmXB8NckhowzCWpAcNckhowzCWpAcNckhoYeTm//r/NvrpgJ3gKmqRxGObbMrvz\n",
+       "iSVpHC6zSFIDhrkkNWCYS1IDhrkkNWCYS1IDhrkkNWCYS1IDhrkkNWCYS1IDhrkkNWCYS1IDhrkk\n",
+       "NWCYS1IDhrkkNWCYS1IDhrkkNWCYS1IDhrkkNWCYS1IDhrkkNTAyzJNcm+TuJF9I8k9JfmM4fjDJ\n",
+       "ySRnk5xIcmDnmytJ2sg4M/NHgd+sqh8FXgS8IcmzgWPAyao6DJwa9iVJczAyzKvqfFXdN2x/E/gi\n",
+       "8HTgCLA0FFsCbt2pRkqSNrelNfMkzwSuB+4BFqpqZbhrBViYasskSWPbP27BJN8L/DXwxqp6OMl3\n",
+       "76uqSlKXeNzxNbvLVbW8vaZKUk9JFoHFiZ6jasMMXl/RE4C/AT5WVb8/HLsfWKyq80kOAXdX1XXr\n",
+       "HldVlcc/4+Vr9U1rdJ9NqTa69R/Msg9n13+OC03TdrJznLNZArwbOHMhyAd3AUeH7aPAnVupWJI0\n",
+       "PSNn5kleCvwd8DkuTj3eDHwauB34AeAccFtVPbTusc7MJ6ut5QzMmfnEtbUcF7poO9k51jLLdhnm\n",
+       "E9fW8pfWMJ+4tpbjQhftyDKLJGn3M8wlqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIa\n",
+       "MMwlqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIaMMwlqQHDXJIaMMwl\n",
+       "qQHDXJIaMMwlqYH9O11Bkl/a6ToGH6qqR2dUlyTtKqmqnXvypOAVD+9YBd/1kSuhrqqqb+50Tauv\n",
+       "aef6bF1tVFVmVNnMzK4PZ9d/jgtNU5La6s94BmE+iwH+PY/C/xw0zC8PhvnEtbUcF7poO2Humrkk\n",
+       "NTAyzJP8RZKVJJ9fc+xgkpNJziY5keTAzjZTkrSZcWbm7wFevu7YMeBkVR0GTg37kppJUrO8zfv1\n",
+       "Xs5GhnlV/T3w9XWHjwBLw/YScOuU2yVp16gZ3TSJ7a6ZL1TVyrC9AixMqT2SpG2Y+APQWj0dxrdV\n",
+       "SZqj7V40tJLk6qo6n+QQ8OClix5fs7043CRJFyRZZMJwHOs88yTPBD5cVT827P8e8LWqekeSY8CB\n",
+       "qnrch6CeZz5xbS3PJ/Y884lr83U1tyPnmSd5P/APwI8k+XKS1wJvB25Ocha4adiXJM2JV4BukTOV\n",
+       "yTkzn7g2X1dzXgEqSXuUYS5JDRjmktSAYS5JDRjmktSAYS5JDRjmktSAYS5JDRjmktSAYS5JDRjm\n",
+       "ktSAYS5JDRjmktSAYS5JDRjmktSAYS5JDRjmktSAYS5JDRjmktSAYS5JDRjmktSAYS5JDRjmktSA\n",
+       "YS5JDRjmktSAYS5JDRjmktSAYS5JDUwU5klenuT+JP+S5Lem1ShJ0tZsO8yT7AP+GHg58BzgVUme\n",
+       "Pa2G9bM87wZoV1qedwPUxCQz8xuAL1XVuap6FPgr4Bem06yOlufdAO1Ky/NugJqYJMyfDnx5zf4D\n",
+       "wzFJ0oztn+CxNV6xm74xQR1jevT7dr4OSdq9JgnzfweuXbN/Lauz83XuvmqCOrbi4SQzqmq79bx1\n",
+       "6zUlY75pXm5m87Oabf91HRez+r3qPN53Xqq213dJ9gP/DPwM8B/Ap4FXVdUXp9c8SdI4tj0zr6rv\n",
+       "JPk14OPAPuDdBrkkzce2Z+aSpN1j4itAkzwpyT1J7ktyJsnbLlHuD4eLiz6b5PpJ692NxumLJItJ\n",
+       "vpHk9HD77Xm0dRaS7Bte44cvcX/7MXHBZn2xx8bEuSSfG17npy9RZk+Mi1F9sdVxMckHoABU1beT\n",
+       "3FhVjwzr6J9K8tKq+tSaRt0C/FBV/XCSFwJ/Crxo0rp3m3H6YvC3VXVkHm2csTcCZ4Cnrr9jr4yJ\n",
+       "NS7ZF4O9MiYKWKyq/9zozj02Ljbti8HY42Iq381SVY8Mm09kdf18feOOAEtD2XuAA0kWplH3bjNG\n",
+       "X8AsTw+YkyTXALcA72Lj17tnxsQYfcEmxzva7LXumXExGPVzH3tcTCXMk1yR5D5gBbi7qs6sK7LR\n",
+       "BUbXTKPu3WaMvijgxcN/IT+a5Dmzb+VMvBN4E/DYJe7fM2OC0X2xV8YErL7WTyS5N8nrN7h/L42L\n",
+       "UX2xpXExrZn5Y1X1PFY7/aeSLG5QbP07TMtPXsfoi88A11bVjwN/BNw54ybuuCSvAB6sqtNsPrNo\n",
+       "PybG7Iv2Y2KNl1TV9cDPAW9I8pMblGk/Lgaj+mJL42KqX4FbVd8APgL8xLq71l9gdM1wrK1L9UVV\n",
+       "PXxhKaaqPgY8IcnBOTRxJ70YOJLk34D3Azcl+ct1ZfbKmBjZF3tkTABQVV8Z/v0qcAer3/G01l4Z\n",
+       "FyP7YqvjYhpnszwtyYFh+8nAzcDpdcXuAl4zlHkR8FBVrUxa924zTl8kWchwqWqSG1g9PXSzD0Au\n",
+       "O1X1lqq6tqqeBbwS+GRVvWZdsT0xJsbpi70wJgCSPCXJU4ftK4GfBT6/rtieGBfj9MVWx8XEZ7MA\n",
+       "h4ClJFew+ubw3qo6leRXAKrqz6rqo0luSfIl4FvAa6dQ7240si+AXwR+Ncl3gEdY/QXvrgD26JhY\n",
+       "73F9wd4ZEwvAHUM+7QfeV1Un9ui4GNkXbHFceNGQJDXgn42TpAYMc0lqwDCXpAYMc0lqwDCXpAYM\n",
+       "c0lqwDCXpAYMc0lq4P8APY+Dg8NpCwIAAAAASUVORK5CYII=\n"
+      ],
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x106923198>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "plt.hist(movies[\"Fandango_Stars\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([  1.,   2.,  20.,  14.,   0.,  22.,  27.,  20.,  25.,  15.]),\n",
+       " array([ 0.5,  0.9,  1.3,  1.7,  2.1,  2.5,  2.9,  3.3,  3.7,  4.1,  4.5]),\n",
+       " <a list of 10 Patch objects>)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": [
+       "iVBORw0KGgoAAAANSUhEUgAAAXMAAAEACAYAAABBDJb9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
+       "AAALEgAACxIB0t1+/AAADzlJREFUeJzt3W+sZPVdx/H3B3ZJi0RXgl1W2Lp9YFM0jeAfbFqU1WpC\n",
+       "GoOYGBRjSkxjGqOlqdFAibHb+KDaxNqoiTEpbda2wTQlULDUgrgT8AHUKttu+WMlKQkoe6nljyAm\n",
+       "pfD1wT1brrdz78ydP+fs/ub9SiZ75syZ+X33e8/9zLm/e8+ZVBWSpFPbaUMXIEman2EuSQ0wzCWp\n",
+       "AYa5JDXAMJekBhjmktSAbcM8yauS3JfkaJIHk3ygW392kjuTfDXJHUn29FOuJGmcTPo78yRnVtUL\n",
+       "SXYB/wT8HnA58F9V9cEk1wLfW1XXLb9cSdI4E6dZquqFbvEM4HTgadbD/HC3/jBwxVKqkyRNZWKY\n",
+       "JzktyVFgDThSVQ8Ae6tqrdtkDdi7xBolSRPsmrRBVb0MXJjke4DPJ/mZTY9XEq8JIEkDmhjmJ1TV\n",
+       "s0k+C/wYsJbk3Ko6nmQf8OTm7Q14SZpNVWWWJ215A84B9nTLrwbuBt4KfBC4tlt/HfDHY55b2732\n",
+       "UDfg0NA1WJM1rWJd1jR1TTXL8yYdme8DDic5jfX59Y9X1V1J7gc+leQdwKPAlTt+F5EkLcy2YV5V\n",
+       "x4AfHbP+KeDnllWUJGlnVvEM0NHQBYwxGrqAMUZDFzDGaOgCxhgNXcAWRkMXMMZo6ALGGA1dwKJM\n",
+       "PGlo5hdOqmaZxJekFTZrdq7ikbkkNccwl6QGGOaS1ADDXJIaYJhLUgMMc0lqgGEuSQ0wzCWpAYa5\n",
+       "JDXAMJekBhjmktQAw1ySGmCYS1IDDHNJaoBhLkkNMMwlqQGTPgNU0kkoyXI+VWYKfujMyckwl05Z\n",
+       "Q+S5OX6ycppFkhpgmEtSAwxzSWqAYS5JDTDMJakBhrkkNcAwl6QGGOaS1IBtwzzJ/iRHkjyQ5CtJ\n",
+       "runWH0ryeJL7u9tl/ZQrSRonVVufRZbkXODcqjqa5CzgX4ArgCuB56rqQ9s8tzztV1qO9dP5hzkD\n",
+       "1O/r5Zo1O7c9nb+qjgPHu+XnkzwEnHdizB1XKUlaiqnnzJMcAC4C7u1WvSvJl5LckGTPEmqTJE1p\n",
+       "qjDvplg+Dby7qp4H/gp4HXAh8ATwp0urUJI00cSrJibZDdwEfKKqbgGoqic3PP4R4LYtnntow91R\n",
+       "VY3mKVbSamr5kr9JDgIH536dCb8ADXAY+EZVvWfD+n1V9US3/B7gJ6rq1zY911+ASkuyar8AXaX/\n",
+       "76zZOSnMLwHuBr7MK528HriK9SmWAr4GvLOq1hZRkKTJVincYLX+v0sJ83kY5tLyrFK4wWr9f2fN\n",
+       "Ts8AlaQGGOaS1ADDXJIaYJhLUgMMc0lqgGEuSQ0wzCWpAYa5JDXAMJekBhjmktQAw1ySGmCYS1ID\n",
+       "DHNJaoBhLkkNMMwlqQGGuSQ1wDCXpAYY5pLUgF1DFyCdyob81HhpI8Ncmtswn00pbeQ0iyQ1wDCX\n",
+       "pAYY5pLUAMNckhpgmEtSAwxzSWqAYS5JDTDMJakBhrkkNWDbME+yP8mRJA8k+UqSa7r1Zye5M8lX\n",
+       "k9yRZE8/5UqSxpl0ZP4i8J6q+mHgTcBvJ7kAuA64s6peD9zV3ZckDWTbMK+q41V1tFt+HngIOA+4\n",
+       "HDjcbXYYuGKZRUqStjf1nHmSA8BFwH3A3qpa6x5aA/YuvDJJ0tSmumpikrOAm4B3V9VzyStXbKuq\n",
+       "2uoyoEkObbg7qqrR7KVqp4a8PGtVeVm/RnnZ38VKchA4OPfrVG3/dUmyG/g74HNV9eFu3cPAwao6\n",
+       "nmQfcKSq3rDpeeU39LDWv+mGuTzrqnzth+yx4/Yzbt/78qzZOemvWQLcADx4Isg7twJXd8tXA7fs\n",
+       "dGBJ0uJse2Se5BLgbuDLvPK2+F7gC8CngNcCjwJXVtUzm57rkfnAPDJfPo/M2x/3VDkynzjNMivD\n",
+       "fHiG+fIZ5u2Pe6qEuWeASlIDDHNJaoBhLkkNMMwlqQGGuSQ1wDCXpAYY5pLUAMNckhpgmEtSAwxz\n",
+       "SWqAYS5JDTDMJakBhrkkNcAwl6QGGOaS1ADDXJIaYJhLUgMMc0lqgGEuSQ0wzCWpAYa5JDXAMJek\n",
+       "BhjmktQAw1ySGmCYS1IDDHNJaoBhLkkNMMwlqQETwzzJR5OsJTm2Yd2hJI8nub+7XbbcMiVJ25nm\n",
+       "yPxjwOawLuBDVXVRd/v7xZcmSZrWxDCvqnuAp8c8lMWXI0maxTxz5u9K8qUkNyTZs7CKJEk7lqqa\n",
+       "vFFyALitqt7Y3X8N8PXu4T8C9lXVOzY9p4D3b1g1qqrR/CVrWutfg8lf3yWMTFWtxE9uQ/bYcfsa\n",
+       "t3+zfP/smnGgJ08sJ/kIcNsW2x2a5fUl6eTR95vIbG8gM02zJNm34e4vAce22laStHwTj8yT3Ahc\n",
+       "CpyT5DHgfcDBJBey/pb1NeCdS61SkrStqebMZ3rhpFZl3vRk5Zz58jln7rjLGHOW7x/PAJWkBhjm\n",
+       "ktQAw1ySGjDTnyZKk3TnGfRuVebqpc0Mcy3J6pzgIZ0MnGaRpAYY5pLUAMNckhpgmEtSAwxzSWqA\n",
+       "YS5JDTDMJakBhrkkNcAwl6QGGOaS1ADDXJIaYJhLUgMMc0lqgGEuSQ0wzCWpAYa5JDXAMJekBhjm\n",
+       "ktQAw1ySGmCYS1IDDHNJaoBhLkkNMMwlqQETwzzJR5OsJTm2Yd3ZSe5M8tUkdyTZs9wyJUnbmebI\n",
+       "/GPAZZvWXQfcWVWvB+7q7kuSBjIxzKvqHuDpTasvBw53y4eBKxZclyRpB2adM99bVWvd8hqwd0H1\n",
+       "SJJmsGveF6iqSlLjHktyaMPdUVWN5h1Pktoy6m7zSdXYHP7/GyUHgNuq6o3d/YeBg1V1PMk+4EhV\n",
+       "vWHTc6qqMneFmtn6m+zkr+8SRmaocfve51axx467/DFn2Y9nnWa5Fbi6W74auGXG15EkLcDEI/Mk\n",
+       "NwKXAuewPj/+h8BngE8BrwUeBa6sqmc2Pc8j84Gt4lGjR+aOe+qPO9t+PNU0yywM8+GtYtAY5o57\n",
+       "6o/b7zSLJOkkYphLUgMMc0lqgGEuSQ0wzCWpAYa5JDXAMJekBhjmktQAw1ySGmCYS1IDDHNJaoBh\n",
+       "LkkNMMwlqQGGuSQ1wDCXpAYY5pLUAMNckhpgmEtSAwxzSWqAYS5JDTDMJakBhrkkNcAwl6QGGOaS\n",
+       "1ADDXJIaYJhLUgMMc0lqgGEuSQ3YNc+TkzwK/DfwEvBiVV28iKIkSTszV5gDBRysqqcWUYwkaTaL\n",
+       "mGbJAl5DkjSHecO8gH9I8sUkv7mIgiRJOzfvNMtbquqJJN8H3Jnk4aq6ZxGFSZKmN1eYV9UT3b9f\n",
+       "T3IzcDHw7TBPcmjD5qOqGs0zniS1Z9Td5pOqmu2JyZnA6VX1XJLvAu4A3l9Vd3SPV1U5nz6gJLU+\n",
+       "E9b7yAw1bt/73Cr22HGXP+Ys+/E8R+Z7gZuTnHidT54IcklSv2Y+Mp/4wh6ZD24Vjxo9MnfcU3/c\n",
+       "2fZjzwCVpAYY5pLUAMNckhpgmEtSAwxzSWqAYS5JDTDMJakBhrkkNcAwl6QGGOaS1ADDXJIaYJhL\n",
+       "UgMMc0lqgGEuSQ0wzCWpAYa5JDXAMJekBhjmktSAeT4DVFNa/2gxSVoew7w3Q31+oaRV4DSLJDXA\n",
+       "MJekBhjmktQAw1ySGrBSvwBN8gPA7qHrkKRFW6kwhzNH8OrXwO6X+xvz5cCT/Q0naSWtWJjv3g13\n",
+       "nwk/1OOY3wDO6XE8SavIOXNJasDMYZ7ksiQPJ/n3JNcusihJ0s7MFOZJTgf+EriM9TmLq5JcsMjC\n",
+       "lucLQxcwxmjoAsYYDV3AGKOhCziFjIYuYIzR0AWMMRq6gIWZ9cj8YuCRqnq0ql4E/hb4xcWVtUz/\n",
+       "PHQBY4yGLmCM0dAFjDEauoBTyGjoAsYYDV3AGKOhC1iYWcP8POCxDfcf79ZJkgYw61+znKJXAXzp\n",
+       "Jbj5m/DQ//Y35jcDfHd/40laRanaeS4neRNwqKou6+6/F3i5qv5kwzanaOBL0rCqaseXPJ01zHcB\n",
+       "/wa8FfhP1n+reFVVPbTjF5MkzW2maZaq+laS3wE+D5wO3GCQS9JwZjoylySdXOY+A3Sak4eS/Hn3\n",
+       "+JeSXDTvmPPWlORgkmeT3N/d/qCHmj6aZC3JsW226btP29Y0UJ/2JzmS5IEkX0lyzRbb9daraWrq\n",
+       "u1dJXpXkviRHkzyY5ANbbNf3PjWxriH2q27c07vxbtvi8V57NammHfepqma+sT7F8ghwgPWrER4F\n",
+       "Lti0zduA27vlnwTunWfMBdV0ELh1mXWMqeungIuAY1s83mufpqxpiD6dC1zYLZ/F+u9mht6npqlp\n",
+       "iF6d2f27C7gXuGTofWrKunrvVTfu7wKfHDf2gL3arqYd9WneI/NpTh66HDgMUFX3AXuS7J1z3Hlr\n",
+       "gp4/ILOq7gGe3maTvvs0TU3Qf5+OV9XRbvl54CHg+zdt1muvpqwJ+u/VC93iGawfxDy1aZPe96kp\n",
+       "64Kee5XkfNYD+yNbjN17r6aoiW3Wf4d5w3yak4fGbXP+nOPOW1MBb+5+nLo9SZ+XUdxK332axqB9\n",
+       "SnKA9Z8c7tv00GC92qam3nuV5LQkR4E14EhVPbhpk0H6NEVdQ+xXfwb8PrDV5a+H6NWkmnbUp3nD\n",
+       "fNrfnm5+d1nmb12nee1/BfZX1Y8AfwHcssR6dqLPPk1jsD4lOQv4NPDu7mj4OzbZdH/pvZpQU++9\n",
+       "qqqXq+pC1kPnp5McHLNZ732aoq5ee5XkF4Anq+p+tj/S7a1XU9a0oz7NG+b/AezfcH8/6+9o221z\n",
+       "frduWSbWVFXPnfhRsKo+B+xOcvYSa5pG332aaKg+JdkN3AR8oqrG7cC992pSTUPuU1X1LPBZ4Mc3\n",
+       "PTToPrVVXQP06s3A5Um+BtwI/GySv9m0Td+9mljTTvs0b5h/EfjBJAeSnAH8CnDrpm1uBd4O3z5z\n",
+       "9JmqWptz3LlqSrI3Sbrli1n/E81x83p96rtPEw3Rp268G4AHq+rDW2zWa6+mqanvXiU5J8mebvnV\n",
+       "wM8D92/arPd9apq6+u5VVV1fVfur6nXArwL/WFVv37RZr72apqad9mmuTxqqLU4eSvLO7vG/rqrb\n",
+       "k7wtySPA/wC/Mc+Yi6gJ+GXgt5J8C3iB9WYuVZIbgUuBc5I8BryP7vNIh+jTNDUxQJ+AtwC/Dnw5\n",
+       "yYkQuB547Ym6BujVxJrov1f7gMNJTmP9oOzjVXXXkN9709bFMPvVRgVwEvRq25rYYZ88aUiSGuDH\n",
+       "xklSAwxzSWqAYS5JDTDMJakBhrkkNcAwl6QGGOaS1ADDXJIa8H89RktAzw7+GQAAAABJRU5ErkJg\n",
+       "gg==\n"
+      ],
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x1075143c8>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.hist(movies[\"Metacritic_norm_round\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fandango vs Metacritic Scores\n",
+    "\n",
+    "There are no scores below a `3.0` in the Fandango reviews.  The Fandango reviews also tend to center around `4.5` and `4.0`, whereas the Metacritic reviews seem to center around `3.0` and `3.5`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "4.08904109589\n",
+      "2.97260273973\n",
+      "0.540385977979\n",
+      "0.990960561374\n",
+      "4.0\n",
+      "3.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy\n",
+    "\n",
+    "f_mean = movies[\"Fandango_Stars\"].mean()\n",
+    "m_mean = movies[\"Metacritic_norm_round\"].mean()\n",
+    "f_std = movies[\"Fandango_Stars\"].std()\n",
+    "m_std = movies[\"Metacritic_norm_round\"].std()\n",
+    "f_median = movies[\"Fandango_Stars\"].median()\n",
+    "m_median = movies[\"Metacritic_norm_round\"].median()\n",
+    "\n",
+    "print(f_mean)\n",
+    "print(m_mean)\n",
+    "print(f_std)\n",
+    "print(m_std)\n",
+    "print(f_median)\n",
+    "print(m_median)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fandango vs Metacritic Methodology\n",
+    "\n",
+    "Fandango appears to inflate ratings and isn't transparent about how it calculates and aggregates ratings.  Metacritic publishes each individual critic rating, and is transparent about how they aggregate them to get a final rating."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fandango vs Metacritic number differences\n",
+    "\n",
+    "The median metacritic score appears higher than the mean metacritic score because a few very low reviews \"drag down\" the median.  The median fandango score is lower than the mean fandango score because a few very high ratings \"drag up\" the mean.\n",
+    "\n",
+    "Fandango ratings appear clustered between `3` and `5`, and have a much narrower random than Metacritic reviews, which go from `0` to `5`.\n",
+    "\n",
+    "Fandango ratings in general appear to be higher than metacritic ratings.\n",
+    "\n",
+    "These may be due to movie studio influence on Fandango ratings, and the fact that Fandango calculates its ratings in a hidden way."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<matplotlib.collections.PathCollection at 0x1079dc7b8>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": [
+       "iVBORw0KGgoAAAANSUhEUgAAAXIAAAEACAYAAACuzv3DAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
+       "AAALEgAACxIB0t1+/AAAEuBJREFUeJzt3X+MZWV9x/HPh13NgnXgXjBry26jCUtSGhq2a1YCWIZa\n",
+       "GdhY2iYk0tCq/GEJscEINk0ITbcJgUSjNESjm9rOqiW2tQba1W1m2OogtGEV9wdtVzo0hYQlVNRF\n",
+       "BGfNss63f5yz2bN378/ZmfOcZ+77lTzhnnueufeTc8987pln5i6OCAEA8nVW6gAAgDNDkQNA5ihy\n",
+       "AMgcRQ4AmaPIASBzFDkAZG7tMJNsPyfpJ5J+Lun1iNjasX9S0j9J+t/yrq9GxD3LFxMA0MtQRS4p\n",
+       "JE1GxJE+cx6NiBuWIRMAYASjLK34DPcDAFbAsEUekvbYftL2h3rsv8L2Qdu7bV+yfBEBAP0Mu7Ry\n",
+       "ZUS8aPstkh6x/XREPFbZv0/SxohYsH29pIclXbzcYQEAp/Oo/9aK7T+X9FpEfLLPnGclbamuqdvm\n",
+       "H3UBgCWIiL5L1wOvyG2fI2lNRLxq+02SrpX0Fx1z1kt6KSLC9lYVbxCn/WJ0UJhxYXt7RGxPnaMJ\n",
+       "OBYncSxO4licNMxF8DBLK+slPWT7xPwHI2LW9q2SFBE7JN0o6TbbxyUtSLppyakBACMZWOQR8ayk\n",
+       "y7rcv6Ny+zOSPrO80QAAw+CTnWnMpQ7QIHOpAzTIXOoADTKXOkBORv5l55KfyA7WyAFgNMN0J1fk\n",
+       "AJA5ihwAMkeRA0DmKHIAyBxFDgCZo8gBIHMUOQBkjiIHgMxR5ACQOYocADJHkQNA5ihyAMgcRQ4A\n",
+       "maPIASBzFDkAZI4iB4DMUeQAkDmKHAAyR5EDQOYocgDIHEUOAJmjyAEgcxQ5AGSOIgeAzFHkAJA5\n",
+       "ihwAMjdUkdt+zvZTtvfb/naPOQ/Yfsb2QdublzcmAKCXtUPOC0mTEXGk207b2yRdFBGbbL9T0mcl\n",
+       "Xb5MGQEAfYyytOI++26Q9AVJioi9ks6zvf5Mgq1Gtqfs82eL4anUedAMtqft9rFieDpRhnm7HcXw\n",
+       "fIoMTZHj9+koV+R7bP9c0o6I+KuO/RdKer6yfVjSBknfP/OIq0NxQkw8JH3q7OKe26+y/XsRMZM2\n",
+       "GVIqinvig9L95T23f9C2IuKWGjPMSxObKhk22Z6PiIvrytAUuX6fDlvkV0bEi7bfIukR209HxGMd\n",
+       "czqv2OPM460m7TuLk+MDJ+44W7rjTkmNPkGw0lo3FwX6gcp9H71ZUm1FLrU2dcmwqb7nb5I8v0+H\n",
+       "KvKIeLH87w9sPyRpq6Rqkb8gaWNle0N53ylsb69szkXE3Ih5AWBVsz0paXKkL4qIvkPSOZLeXN5+\n",
+       "k6R/k3Rtx5xtknaXty+X9ESXx4lBz7Wah6QpaWJB2hnFmFiQNJU6FyP5eTEtTUTlvAhJ0zVnmO+S\n",
+       "YT71sUn0ejTu+3SY7nQ5sd+7w9slPVRurpX0YETcZ/vW8hl2lPM+Lek6ST+VdEtE7Ot4nIiIfr8w\n",
+       "XfWK9bf2ncXWkU9Gw9fdUI9inbx1c7H18oNR4/p4JcN8scQiSS8/E2O4Pn5C075Ph+nOgUVeZxgA\n",
+       "wKmG6U4+2QkAmaPIASBzFDkAZI4iB4DMUeQAkDmKHAAyR5EDQOYocgDIHEUOAJmjyAEgcxQ5AGSO\n",
+       "IgeAzFHkAJA5ihwAMkeRA0DmKHIAyBxFDgCZo8gBIHMUOQBkjiIHgMxR5ACQOYocADJHkQNA5ihy\n",
+       "AMgcRQ4AmaPIASBzFDkAZI4iB4DMUeQAkLmhitz2Gtv7be/qsm/S9ivl/v22717+mACAXoa9Iv+I\n",
+       "pEOSosf+RyNicznuWZ5oWAm2n7TbUQw/Oc45bP+4kuHHKTKUOY5WchxNlOFwJcPhFBnKHK9Xcrye\n",
+       "KMOM3V4shmdSZBjVwCK3vUHSNkmfl+Re05YzFFZGUZgTW6T7VYyJLSlKtAk5iuKeOLeS4dwUZV4U\n",
+       "98S6So51dZd5UdwTF1YyXJiizIvinlhbybG27jIvinviWul+F2Pi2izKPCL6DklfkbRZ0tWSdnXZ\n",
+       "f7WkH0k6KGm3pEt6PE4Mei7Gyg6pFdLOkKIcO0Nq1f66NCFHEzI0JUcTMjQlh9Ra7JJhse5jcWom\n",
+       "DTwGa/uVvO33SnopIvbbnuwxbZ+kjRGxYPt6SQ9LurjH422vbM5FxFy/5weAcVN27eRIX1M2fq8H\n",
+       "vFfSH0o6LmmdpAlJX42I9/f5mmclbYmIIx33R0SwBJPQySWNB8p7bpf0k+9GxDvGLcfJpZVTMrwS\n",
+       "EefVlaHMUS6tnJLjZxFxdo0ZyqWVUzK8EBEb6spQ5iiXVk7JcTwi3lBjhnJp5ZQMsxExVVeGLpkG\n",
+       "dmffIu94sKslfSwifrvj/vUqrtrD9lZJ/xARb1tKGKy8okRbW4qtl2sv8SblKMq8dW6ZofYSr+Q4\n",
+       "KrXWlTlqLfFKhsNS68IyQ+0lXsnxutQqVwperrXEKxlmpNZ7ygyPpCzxMs/A7uy7tNJFlA98qyRF\n",
+       "xA5JN0q6zfZxSQuSblpCVtQkVXF3akKOVMXdKUVxd8mQpLg7pSjuLhmSFvdSDH1FfsZPxBU5AIxs\n",
+       "mO7kk50AkDmKHAAyR5EDQOYocgDIHEUOAJmjyAEgcxQ5AGSOIgeAzFHkAJA5ihwAMkeRA0DmKHIA\n",
+       "yBxFDgCZo8gBIHMUOQBkjiIHgMxR5ACQOYocADJHkQNA5ihyAMgcRQ4AmaPIASBzFDkAZI4iB4DM\n",
+       "UeQAkDmKHAAyR5EDQOYocgDI3FBFbnuN7f22d/XY/4DtZ2wftL15eSMCAPoZ9or8I5IOSYrOHba3\n",
+       "SbooIjZJ+iNJn12+eKuL7Sn7/NlieCpRhnm7HcXwfIoMZY5pu32sGJ5OlOFw5VgcTpGhzHG0kuNo\n",
+       "ogzJX48yx4zdXiyGZxJlaMT3yEgiou+QtEHSHknXSNrVZf/nJL2vsv20pPVd5sWg51rNQ9KUNLEg\n",
+       "7YxiTCxImqo5w7w0EZUMIWk+wbGY7pJjuuYMh7tkOJzgWBztkuPouL0eZY6ZLjlmas7QiO+Rjkwx\n",
+       "cM4QD/IVSZslXd2jyHdJuqKyvUfSlqWEWc1Das8WJ0aUY2dI7dl6M7Ti9Ayt2l8XqXWsS45jY3os\n",
+       "kudowutR5ljskmNx3F6P0zNp4POv7Xe1bvu9kl6KiP22J/tN7bzQ7/F42yubcxEx1+/5AWDclF07\n",
+       "OdIXDXgnuFfS85KelfSipJ9K+mLHnM9JuqmyzdJK92PJ0srJHMl/lBdLK416PcocLK10zxQD54zw\n",
+       "YL2WVrZJ2l3evlzSE0sNs9pHUebt2WLUW+KVDPPFj4+tpCdoUR6tY8WovzTKDIcrx6L2Eq/kOFrJ\n",
+       "UWuJN+n1KHPMFEssrcW6S7ySoRHfI5U8MWiOy4kD2b5a0p0RcYPtW8tH31Hu+7Sk68or9lsiYl+X\n",
+       "r4+I6FyCAQD0MUx3Dl3kdYQBAJxqmO7kk50AkDmKHAAyR5EDQOYocgDIHEUOAJmjyAEgcxQ5AGSO\n",
+       "IgeAzFHkAJA5ihwAMkeRA0DmKHIAyBxFDgCZo8gBIHMUOQBkjiIHgMxR5ACQOYocADJHkQNA5ihy\n",
+       "AMgcRQ4AmaPIASBzFDkAZI4iB4DMUeQAkDmKHAAyR5EDQOYocgDI3MAit73O9l7bB2wfsn1flzmT\n",
+       "tl+xvb8cd69MXABAp4FFHhE/k3RNRFwm6dckXWP7qi5TH42IzeW4Z7mDngnbU/b5s8XwVOo8Kdme\n",
+       "t9tRDM8nzDFtt48Vw9OJMszY7cVieCZFhjJH8vOzQccieY4mZBhZRAw9JJ0j6TuSLum4f1LSrgFf\n",
+       "G6M813INSVPSxIK0M4oxsSBpKkWW1EPSvDQRlWMRkuYT5JjukmO65gwzXTLMJDgWyc/PBh2L5Dma\n",
+       "kKFLphg4Z8gHOkvSAUmvSvp4l/1XS/qRpIOSdncW/bBhVuYgtGeLFyTKsTOk9mzKFybdCdGK049F\n",
+       "q/bXRWod65LjWM0ZFrtkWKz/WKQ/P5tzLNLnaEKG0zMpBs1ZO+RV+6Kky2yfK2nG9mREzFWm7JO0\n",
+       "MSIWbF8v6WFJF3c+ju3tlc25jscAgLFne1LFKsfwlvDu8GeSPjZgzrOS2qO+q6zQu1nyH12bMsTS\n",
+       "SjVDI36EbsL52aBjkTxHEzJ0yRQD5wzxIBdIOq+8fbakb0l6d8ec9ZJc3t4q6bmlhFnBAzFV/Ajb\n",
+       "nh3XEq8ci/liiaWVpMQrOaaLJZbWsbpLvJJhpvhRurWY8pu1Cedng45F8hxNyNCRJwbNOVG+Pdm+\n",
+       "VNIXVKyTnyXpSxHxCdu3ls+ww/aHJd0m6bikBUl3RMQTHY8TEeG+TwYAOMUw3TmwyOsMAwA41TDd\n",
+       "ySc7ASBzFDkAZI4iB4DMUeQAkDmKHAAyR5EDQOYocgDIHEUOAJmjyAEgcxQ5AGSOIgeAzFHkAJA5\n",
+       "ihwAMkeRA0DmKHIAyBxFDgCZo8gBIHMUOQBkjiIHgMxR5ACQOYocADJHkQNA5ihyAMgcRQ4AmaPI\n",
+       "ASBzFDkAZI4iB4DMUeQAkLm+RW57ne29tg/YPmT7vh7zHrD9jO2DtjevTFQAQDdr++2MiJ/ZviYi\n",
+       "FmyvlfS47asi4vETc2xvk3RRRGyy/U5Jn5V0+crGBgCcMHBpJSIWyptvlLRG0pGOKTdI+kI5d6+k\n",
+       "82yvX86QWD62p+32sWJ4epxz2L7LPv+HxfBdKTKUOabs82eL4alUOZqgCedFliKi71BR9gckvSrp\n",
+       "413275J0RWV7j6QtXebFoOdirOyQNC1NhLSzHBMhaXocc0i6q0uGuxIciylpYqGSY0HSVOpzJcVo\n",
+       "wnnRxDFMd47yYOdKekLSZMf9uyRdWdneI+nXlxKGsdInROtY8Q0S5dgZUuvYOOaQ2j88PUP7h/Uf\n",
+       "i/Zslxyzqc+VFKMJ50UTxzDd2XeNvOPK/RXbX5f0DklzlV0vSNpY2d5Q3nca29srm3MRMddtHgCM\n",
+       "K9uTkiZH+qIB7wQXSDqvvH22pG9JenfHnG2Sdpe3L5f0xFLfVRgr/s7eiB9dm5BDLK00bjThvGji\n",
+       "GKY7XU7s9c5wqYpfZJ5Vji9FxCds31o++o5y3qclXSfpp5JuiYh9XR4rIsIjvctg2RW/QGrdXGy9\n",
+       "/GBE3DKuOYpfcLbvKLaOfCoi7q07Q5ljSmrfWeb4ZETMpMjRBE04L5pmmO7sW+R1hwEAnGqY7uST\n",
+       "nQCQOYocADJHkQNA5ihyAMgcRQ4AmaPIASBzFDkAZI4iB4DMUeQAkDmKHAAyR5EDQOYocgDIHEUO\n",
+       "AJmjyAEgcxQ5AGSOIgeAzFHkAJA5ihwAMkeRA0DmKHIAyBxFDgCZo8gBIHMUOQBkjiIHgMxR5ACQ\n",
+       "OYocADJHkQNA5ihyAMjcwCK3vdH2N23/l+3/tH17lzmTtl+xvb8cd69MXABAp2GuyF+X9NGI+FVJ\n",
+       "l0v6sO1f6TLv0YjYXI57ljXlKmN7MnWGpuBYnMSxOIljMZqBRR4R/xcRB8rbr0n6nqRf6jLVy5xt\n",
+       "NZtMHaBBJlMHaJDJ1AEaZDJ1gJyMtEZu+22SNkva27ErJF1h+6Dt3bYvWZ54AIBB1g470fYvSPpH\n",
+       "SR8pr8yr9knaGBELtq+X9LCki5cvJgCgF0fE4En2GyR9TdK/RMRfDjH/WUlbIuJI5b7BTwQAOE1E\n",
+       "9F26HnhFbtuS/lrSoV4lbnu9pJciImxvVfEGcaQ6Z1AQAMDSDLO0cqWkP5D0lO395X13SfplSYqI\n",
+       "HZJulHSb7eOSFiTdtAJZAQBdDLW0AgBorlo+2Wn7OttP237G9p/W8ZxNZPtvbH/f9n+kzpLaMB80\n",
+       "Gxe219nea/uA7UO270udKSXba8oPFu5KnSUl28/Zfqo8Ft/uO3elr8htr5H035J+S9ILkr4j6fcj\n",
+       "4nsr+sQNZPtdkl6T9MWIuDR1npRsv1XSWyPiQPkXUd+V9LvjeF5Iku1zyr/6WivpcUkfi4jHU+dK\n",
+       "wfYdkrZIenNE3JA6Tyrd/miklzquyLdK+p+IeC4iXpf0d5J+p4bnbZyIeEzSy6lzNMEIHzQbCxGx\n",
+       "UN58o6Q1kgZ+865GtjdI2ibp8+JDhtKQx6COIr9Q0vOV7cPlfYCkvh80Gxu2z7J9QNL3JX0zIg6l\n",
+       "zpTI/ZL+RNJi6iANEJL22H7S9of6TayjyPltKnoa8EGzsRERixFxmaQNkn5jHP+tEdvvVfFnzPvF\n",
+       "1bgkXRkRmyVdr+LfuHpXr4l1FPkLkjZWtjequCrHmCs/aPZVSX8bEQ+nztMEEfGKpK9LekfqLAlc\n",
+       "IemGcm34y5J+0/YXE2dKJiJeLP/7A0kPqVim7qqOIn9S0ibbb7P9Rknvk/TPNTwvGmyYD5qNC9sX\n",
+       "2D6vvH22pPdI2t//q1afiLgrIjZGxNtVfBblGxHx/tS5UrB9ju03l7ffJOlaST3/2m3Fizwijkv6\n",
+       "Y0kzkg5J+vsx/suEL0v6d0kX237e9i2pMyV04oNm11T+HfvrUodK5BclfaNcI98raVdE/GviTE0w\n",
+       "zsuy6yU9VjknvhYRs70m84EgAMgc/6s3AMgcRQ4AmaPIASBzFDkAZI4iB4DMUeQAkDmKHAAyR5ED\n",
+       "QOb+H+ILH0XCPZKpAAAAAElFTkSuQmCC\n"
+      ],
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x1079642e8>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.scatter(movies[\"Metacritic_norm_round\"], movies[\"Fandango_Stars\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "movies[\"fm_diff\"] = numpy.abs(movies[\"Metacritic_norm_round\"] - movies[\"Fandango_Stars\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>FILM</th>\n",
+       "      <th>RottenTomatoes</th>\n",
+       "      <th>RottenTomatoes_User</th>\n",
+       "      <th>Metacritic</th>\n",
+       "      <th>Metacritic_User</th>\n",
+       "      <th>IMDB</th>\n",
+       "      <th>Fandango_Stars</th>\n",
+       "      <th>Fandango_Ratingvalue</th>\n",
+       "      <th>RT_norm</th>\n",
+       "      <th>RT_user_norm</th>\n",
+       "      <th>...</th>\n",
+       "      <th>RT_norm_round</th>\n",
+       "      <th>RT_user_norm_round</th>\n",
+       "      <th>Metacritic_norm_round</th>\n",
+       "      <th>Metacritic_user_norm_round</th>\n",
+       "      <th>IMDB_norm_round</th>\n",
+       "      <th>Metacritic_user_vote_count</th>\n",
+       "      <th>IMDB_user_vote_count</th>\n",
+       "      <th>Fandango_votes</th>\n",
+       "      <th>Fandango_Difference</th>\n",
+       "      <th>fm_diff</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Do You Believe? (2015)</td>\n",
+       "      <td>18</td>\n",
+       "      <td>84</td>\n",
+       "      <td>22</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>5.4</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>0.90</td>\n",
+       "      <td>4.20</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>31</td>\n",
+       "      <td>3136</td>\n",
+       "      <td>1793</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>85</th>\n",
+       "      <td>Little Boy (2015)</td>\n",
+       "      <td>20</td>\n",
+       "      <td>81</td>\n",
+       "      <td>30</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>7.4</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.3</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>38</td>\n",
+       "      <td>5927</td>\n",
+       "      <td>811</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>47</th>\n",
+       "      <td>Annie (2014)</td>\n",
+       "      <td>27</td>\n",
+       "      <td>61</td>\n",
+       "      <td>33</td>\n",
+       "      <td>4.8</td>\n",
+       "      <td>5.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.2</td>\n",
+       "      <td>1.35</td>\n",
+       "      <td>3.05</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>108</td>\n",
+       "      <td>19222</td>\n",
+       "      <td>6835</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Pixels (2015)</td>\n",
+       "      <td>17</td>\n",
+       "      <td>54</td>\n",
+       "      <td>27</td>\n",
+       "      <td>5.3</td>\n",
+       "      <td>5.6</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>0.85</td>\n",
+       "      <td>2.70</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>246</td>\n",
+       "      <td>19521</td>\n",
+       "      <td>3886</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>134</th>\n",
+       "      <td>The Longest Ride (2015)</td>\n",
+       "      <td>31</td>\n",
+       "      <td>73</td>\n",
+       "      <td>33</td>\n",
+       "      <td>4.8</td>\n",
+       "      <td>7.2</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>1.55</td>\n",
+       "      <td>3.65</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>49</td>\n",
+       "      <td>25214</td>\n",
+       "      <td>2603</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 23 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        FILM  RottenTomatoes  RottenTomatoes_User  Metacritic  \\\n",
+       "3     Do You Believe? (2015)              18                   84          22   \n",
+       "85         Little Boy (2015)              20                   81          30   \n",
+       "47              Annie (2014)              27                   61          33   \n",
+       "19             Pixels (2015)              17                   54          27   \n",
+       "134  The Longest Ride (2015)              31                   73          33   \n",
+       "\n",
+       "     Metacritic_User  IMDB  Fandango_Stars  Fandango_Ratingvalue  RT_norm  \\\n",
+       "3                4.7   5.4             5.0                   4.5     0.90   \n",
+       "85               5.9   7.4             4.5                   4.3     1.00   \n",
+       "47               4.8   5.2             4.5                   4.2     1.35   \n",
+       "19               5.3   5.6             4.5                   4.1     0.85   \n",
+       "134              4.8   7.2             4.5                   4.5     1.55   \n",
+       "\n",
+       "     RT_user_norm   ...     RT_norm_round  RT_user_norm_round  \\\n",
+       "3            4.20   ...               1.0                 4.0   \n",
+       "85           4.05   ...               1.0                 4.0   \n",
+       "47           3.05   ...               1.5                 3.0   \n",
+       "19           2.70   ...               1.0                 2.5   \n",
+       "134          3.65   ...               1.5                 3.5   \n",
+       "\n",
+       "     Metacritic_norm_round  Metacritic_user_norm_round  IMDB_norm_round  \\\n",
+       "3                      1.0                         2.5              2.5   \n",
+       "85                     1.5                         3.0              3.5   \n",
+       "47                     1.5                         2.5              2.5   \n",
+       "19                     1.5                         2.5              3.0   \n",
+       "134                    1.5                         2.5              3.5   \n",
+       "\n",
+       "     Metacritic_user_vote_count  IMDB_user_vote_count  Fandango_votes  \\\n",
+       "3                            31                  3136            1793   \n",
+       "85                           38                  5927             811   \n",
+       "47                          108                 19222            6835   \n",
+       "19                          246                 19521            3886   \n",
+       "134                          49                 25214            2603   \n",
+       "\n",
+       "     Fandango_Difference  fm_diff  \n",
+       "3                    0.5        4  \n",
+       "85                   0.2        3  \n",
+       "47                   0.3        3  \n",
+       "19                   0.4        3  \n",
+       "134                  0.0        3  \n",
+       "\n",
+       "[5 rows x 23 columns]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "movies.sort(\"fm_diff\", ascending=False).head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.17844919073895918"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from scipy.stats import pearsonr\n",
+    "\n",
+    "r_value, p_value = pearsonr(movies[\"Fandango_Stars\"], movies[\"Metacritic_norm_round\"])\n",
+    "\n",
+    "r_value"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fandango and Metacritic correlation\n",
+    "\n",
+    "The low correlation between Fandango and Metacritic scores indicates that Fandango scores aren't just inflated, they are fundamentally different.  For whatever reason, it appears like Fandango both inflates scores overall, and inflates scores differently depending on the movie."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.17844919073895915"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from scipy.stats import linregress\n",
+    "\n",
+    "slope, intercept, r_value, p_value, stderr_slope = linregress(movies[\"Metacritic_norm_round\"], movies[\"Fandango_Stars\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4.0917071528212032"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pred = 3 * slope + intercept\n",
+    "\n",
+    "pred"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

+ 2476 - 0
Mission210Solution.ipynb

@@ -0,0 +1,2476 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Show Number</th>\n",
+       "      <th>Air Date</th>\n",
+       "      <th>Round</th>\n",
+       "      <th>Category</th>\n",
+       "      <th>Value</th>\n",
+       "      <th>Question</th>\n",
+       "      <th>Answer</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>For the last 8 years of his life, Galileo was ...</td>\n",
+       "      <td>Copernicus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>No. 2: 1912 Olympian; football star at Carlisl...</td>\n",
+       "      <td>Jim Thorpe</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>The city of Yuma in this state has a record av...</td>\n",
+       "      <td>Arizona</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>In 1963, live on \"The Art Linkletter Show\", th...</td>\n",
+       "      <td>McDonald's</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Signer of the Dec. of Indep., framer of the Co...</td>\n",
+       "      <td>John Adams</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>In the title of an Aesop fable, this insect sh...</td>\n",
+       "      <td>the ant</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>Built in 312 B.C. to link Rome &amp; the South of ...</td>\n",
+       "      <td>the Appian Way</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>No. 8: 30 steals for the Birmingham Barons; 2,...</td>\n",
+       "      <td>Michael Jordan</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>In the winter of 1971-72, a record 1,122 inche...</td>\n",
+       "      <td>Washington</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>This housewares store was named for the packag...</td>\n",
+       "      <td>Crate &amp; Barrel</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>\"And away we go\"</td>\n",
+       "      <td>Jackie Gleason</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>Cows regurgitate this from the first stomach t...</td>\n",
+       "      <td>the cud</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>In 1000 Rajaraja I of the Cholas battled to ta...</td>\n",
+       "      <td>Ceylon (or Sri Lanka)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>No. 1: Lettered in hoops, football &amp; lacrosse ...</td>\n",
+       "      <td>Jim Brown</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>On June 28, 1994 the nat'l weather service beg...</td>\n",
+       "      <td>the UV index</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>This company's Accutron watch, introduced in 1...</td>\n",
+       "      <td>Bulova</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>Outlaw: \"Murdered by a traitor and a coward wh...</td>\n",
+       "      <td>Jesse James</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>A small demon, or a mischievous child (who mig...</td>\n",
+       "      <td>imp</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>Karl led the first of these Marxist organizati...</td>\n",
+       "      <td>the International</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>No. 10: FB/LB for Columbia U. in the 1920s; MV...</td>\n",
+       "      <td>(Lou) Gehrig</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>Africa's lowest temperature was 11 degrees bel...</td>\n",
+       "      <td>Morocco</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>Edward Teller &amp; this man partnered in 1898 to ...</td>\n",
+       "      <td>(Paul) Bonwit</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$2,000</td>\n",
+       "      <td>1939 Oscar winner: \"...you are a credit to you...</td>\n",
+       "      <td>Hattie McDaniel (for her role in Gone with the...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>In geologic time one of these, shorter than an...</td>\n",
+       "      <td>era</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>This Asian political party was founded in 1885...</td>\n",
+       "      <td>the Congress Party</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>No. 5: Only center to lead the NBA in assists;...</td>\n",
+       "      <td>(Wilt) Chamberlain</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>The Kirschner brothers, Don &amp; Bill, named this...</td>\n",
+       "      <td>K2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>Revolutionary War hero: \"His spirit is in Verm...</td>\n",
+       "      <td>Ethan Allen</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>A single layer of paper, or to perform one's c...</td>\n",
+       "      <td>ply</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>DR. SEUSS AT THE MULTIPLEX</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>&lt;a href=\"http://www.j-archive.com/media/2004-1...</td>\n",
+       "      <td>Horton</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19969</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>AMERICAN HISTORY</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>In 1960 the last of these locomotives was reti...</td>\n",
+       "      <td>steam engines</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19970</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>MIND YOUR SHAKESPEARE \"P\"s &amp; \"Q\"s</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>Kate: \"if I be waspish, best beware my sting\";...</td>\n",
+       "      <td>Petruchio</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19971</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ALMA MATERS</td>\n",
+       "      <td>$1,500</td>\n",
+       "      <td>This private college in Northern California bo...</td>\n",
+       "      <td>Stanford University</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19972</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ACTRESSES</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>She voiced Princess Pea in \"The Tale of Desper...</td>\n",
+       "      <td>Emma Watson</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19973</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>2-LETTER WORDS</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>It's the name of the long-awaited new White Ho...</td>\n",
+       "      <td>Bo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19974</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ANGELS &amp; DEMONS</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>Langdon in \"Angels &amp; Demons\" is looking for &lt;a...</td>\n",
+       "      <td>an antimatter bomb</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19975</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>AMERICAN HISTORY</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>In the 1600s most of New York State was occupi...</td>\n",
+       "      <td>the Iroquois</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19976</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>MIND YOUR SHAKESPEARE \"P\"s &amp; \"Q\"s</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Marina's dad (need a hint? he rules Tyre)</td>\n",
+       "      <td>Pericles</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19977</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ALMA MATERS</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Presidential kids are welcome at this New Orle...</td>\n",
+       "      <td>Tulane</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19978</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ACTRESSES</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>She didn't vamp it up &amp; did a bella job as Em ...</td>\n",
+       "      <td>Kristen Stewart</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19979</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>2-LETTER WORDS</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Third syllable intoned by the giant who smells...</td>\n",
+       "      <td>fo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19980</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ANGELS &amp; DEMONS</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Much of \"Angels &amp; Demons\" takes place at one o...</td>\n",
+       "      <td>a conclave</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19981</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>AMERICAN HISTORY</td>\n",
+       "      <td>$1,200</td>\n",
+       "      <td>In 1899 Secretary of State John Hay proclaimed...</td>\n",
+       "      <td>open-door policy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19982</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>MIND YOUR SHAKESPEARE \"P\"s &amp; \"Q\"s</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>Fruity surname of Peter in \"A Midsummer Night'...</td>\n",
+       "      <td>Quince</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19983</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ALMA MATERS</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>Quincy Jones, Kevin Eubanks &amp; Branford Marsali...</td>\n",
+       "      <td>Berklee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19984</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ACTRESSES</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>In 2009 she returned to being \"Fast &amp; Furious\"...</td>\n",
+       "      <td>Michelle Rodriguez</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19985</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>2-LETTER WORDS</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>The book of Genesis says this ancient city \"of...</td>\n",
+       "      <td>Ur</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19986</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ANGELS &amp; DEMONS</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>\"Habakkuk and the Angel\" is one of a series of...</td>\n",
+       "      <td>Bernini</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19987</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Final Jeopardy!</td>\n",
+       "      <td>SCIENCE TERMS</td>\n",
+       "      <td>None</td>\n",
+       "      <td>In medieval England, it meant the smallest uni...</td>\n",
+       "      <td>atom</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19988</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>U.S. GEOGRAPHY</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>This Texas city is the largest in the U.S. to ...</td>\n",
+       "      <td>Houston (Lee Brown)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19989</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>POP MUSIC PAIRINGS</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>...&amp; the Crickets</td>\n",
+       "      <td>Buddy Holly</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19990</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORIC PEOPLE</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>In the 990s this son of Erik the Red brought C...</td>\n",
+       "      <td>Leif Ericson</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19991</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>1998 QUOTATIONS</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>Concerning a failed Windows 98 demonstration, ...</td>\n",
+       "      <td>Bill Gates</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19992</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>LLAMA-RAMA</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>This llama product is used to make hats, blank...</td>\n",
+       "      <td>Wool</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19993</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>DING DONG</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>In 1967 this company introduced its chocolate-...</td>\n",
+       "      <td>Hostess</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19994</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>U.S. GEOGRAPHY</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Of 8, 12 or 18, the number of U.S. states that...</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19995</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>POP MUSIC PAIRINGS</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>...&amp; the New Power Generation</td>\n",
+       "      <td>Prince</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19996</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORIC PEOPLE</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>In 1589 he was appointed professor of mathemat...</td>\n",
+       "      <td>Galileo</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19997</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>1998 QUOTATIONS</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Before the grand jury she said, \"I'm really so...</td>\n",
+       "      <td>Monica Lewinsky</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19998</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>LLAMA-RAMA</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Llamas are the heftiest South American members...</td>\n",
+       "      <td>Camels</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>19999 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Show Number    Air Date             Round  \\\n",
+       "0             4680  2004-12-31         Jeopardy!   \n",
+       "1             4680  2004-12-31         Jeopardy!   \n",
+       "2             4680  2004-12-31         Jeopardy!   \n",
+       "3             4680  2004-12-31         Jeopardy!   \n",
+       "4             4680  2004-12-31         Jeopardy!   \n",
+       "5             4680  2004-12-31         Jeopardy!   \n",
+       "6             4680  2004-12-31         Jeopardy!   \n",
+       "7             4680  2004-12-31         Jeopardy!   \n",
+       "8             4680  2004-12-31         Jeopardy!   \n",
+       "9             4680  2004-12-31         Jeopardy!   \n",
+       "10            4680  2004-12-31         Jeopardy!   \n",
+       "11            4680  2004-12-31         Jeopardy!   \n",
+       "12            4680  2004-12-31         Jeopardy!   \n",
+       "13            4680  2004-12-31         Jeopardy!   \n",
+       "14            4680  2004-12-31         Jeopardy!   \n",
+       "15            4680  2004-12-31         Jeopardy!   \n",
+       "16            4680  2004-12-31         Jeopardy!   \n",
+       "17            4680  2004-12-31         Jeopardy!   \n",
+       "18            4680  2004-12-31         Jeopardy!   \n",
+       "19            4680  2004-12-31         Jeopardy!   \n",
+       "20            4680  2004-12-31         Jeopardy!   \n",
+       "21            4680  2004-12-31         Jeopardy!   \n",
+       "22            4680  2004-12-31         Jeopardy!   \n",
+       "23            4680  2004-12-31         Jeopardy!   \n",
+       "24            4680  2004-12-31         Jeopardy!   \n",
+       "25            4680  2004-12-31         Jeopardy!   \n",
+       "26            4680  2004-12-31         Jeopardy!   \n",
+       "27            4680  2004-12-31         Jeopardy!   \n",
+       "28            4680  2004-12-31         Jeopardy!   \n",
+       "29            4680  2004-12-31  Double Jeopardy!   \n",
+       "...            ...         ...               ...   \n",
+       "19969         5694  2009-05-14  Double Jeopardy!   \n",
+       "19970         5694  2009-05-14  Double Jeopardy!   \n",
+       "19971         5694  2009-05-14  Double Jeopardy!   \n",
+       "19972         5694  2009-05-14  Double Jeopardy!   \n",
+       "19973         5694  2009-05-14  Double Jeopardy!   \n",
+       "19974         5694  2009-05-14  Double Jeopardy!   \n",
+       "19975         5694  2009-05-14  Double Jeopardy!   \n",
+       "19976         5694  2009-05-14  Double Jeopardy!   \n",
+       "19977         5694  2009-05-14  Double Jeopardy!   \n",
+       "19978         5694  2009-05-14  Double Jeopardy!   \n",
+       "19979         5694  2009-05-14  Double Jeopardy!   \n",
+       "19980         5694  2009-05-14  Double Jeopardy!   \n",
+       "19981         5694  2009-05-14  Double Jeopardy!   \n",
+       "19982         5694  2009-05-14  Double Jeopardy!   \n",
+       "19983         5694  2009-05-14  Double Jeopardy!   \n",
+       "19984         5694  2009-05-14  Double Jeopardy!   \n",
+       "19985         5694  2009-05-14  Double Jeopardy!   \n",
+       "19986         5694  2009-05-14  Double Jeopardy!   \n",
+       "19987         5694  2009-05-14   Final Jeopardy!   \n",
+       "19988         3582  2000-03-14         Jeopardy!   \n",
+       "19989         3582  2000-03-14         Jeopardy!   \n",
+       "19990         3582  2000-03-14         Jeopardy!   \n",
+       "19991         3582  2000-03-14         Jeopardy!   \n",
+       "19992         3582  2000-03-14         Jeopardy!   \n",
+       "19993         3582  2000-03-14         Jeopardy!   \n",
+       "19994         3582  2000-03-14         Jeopardy!   \n",
+       "19995         3582  2000-03-14         Jeopardy!   \n",
+       "19996         3582  2000-03-14         Jeopardy!   \n",
+       "19997         3582  2000-03-14         Jeopardy!   \n",
+       "19998         3582  2000-03-14         Jeopardy!   \n",
+       "\n",
+       "                                Category   Value  \\\n",
+       "0                                HISTORY    $200   \n",
+       "1        ESPN's TOP 10 ALL-TIME ATHLETES    $200   \n",
+       "2            EVERYBODY TALKS ABOUT IT...    $200   \n",
+       "3                       THE COMPANY LINE    $200   \n",
+       "4                    EPITAPHS & TRIBUTES    $200   \n",
+       "5                         3-LETTER WORDS    $200   \n",
+       "6                                HISTORY    $400   \n",
+       "7        ESPN's TOP 10 ALL-TIME ATHLETES    $400   \n",
+       "8            EVERYBODY TALKS ABOUT IT...    $400   \n",
+       "9                       THE COMPANY LINE    $400   \n",
+       "10                   EPITAPHS & TRIBUTES    $400   \n",
+       "11                        3-LETTER WORDS    $400   \n",
+       "12                               HISTORY    $600   \n",
+       "13       ESPN's TOP 10 ALL-TIME ATHLETES    $600   \n",
+       "14           EVERYBODY TALKS ABOUT IT...    $600   \n",
+       "15                      THE COMPANY LINE    $600   \n",
+       "16                   EPITAPHS & TRIBUTES    $600   \n",
+       "17                        3-LETTER WORDS    $600   \n",
+       "18                               HISTORY    $800   \n",
+       "19       ESPN's TOP 10 ALL-TIME ATHLETES    $800   \n",
+       "20           EVERYBODY TALKS ABOUT IT...    $800   \n",
+       "21                      THE COMPANY LINE    $800   \n",
+       "22                   EPITAPHS & TRIBUTES  $2,000   \n",
+       "23                        3-LETTER WORDS    $800   \n",
+       "24                               HISTORY   $1000   \n",
+       "25       ESPN's TOP 10 ALL-TIME ATHLETES   $1000   \n",
+       "26                      THE COMPANY LINE   $1000   \n",
+       "27                   EPITAPHS & TRIBUTES   $1000   \n",
+       "28                        3-LETTER WORDS   $1000   \n",
+       "29            DR. SEUSS AT THE MULTIPLEX    $400   \n",
+       "...                                  ...     ...   \n",
+       "19969                   AMERICAN HISTORY   $1200   \n",
+       "19970  MIND YOUR SHAKESPEARE \"P\"s & \"Q\"s   $1200   \n",
+       "19971                        ALMA MATERS  $1,500   \n",
+       "19972                          ACTRESSES   $1200   \n",
+       "19973                     2-LETTER WORDS   $1200   \n",
+       "19974                    ANGELS & DEMONS   $1200   \n",
+       "19975                   AMERICAN HISTORY   $1600   \n",
+       "19976  MIND YOUR SHAKESPEARE \"P\"s & \"Q\"s   $1600   \n",
+       "19977                        ALMA MATERS   $1600   \n",
+       "19978                          ACTRESSES   $1600   \n",
+       "19979                     2-LETTER WORDS   $1600   \n",
+       "19980                    ANGELS & DEMONS   $1600   \n",
+       "19981                   AMERICAN HISTORY  $1,200   \n",
+       "19982  MIND YOUR SHAKESPEARE \"P\"s & \"Q\"s   $2000   \n",
+       "19983                        ALMA MATERS   $2000   \n",
+       "19984                          ACTRESSES   $2000   \n",
+       "19985                     2-LETTER WORDS   $2000   \n",
+       "19986                    ANGELS & DEMONS   $2000   \n",
+       "19987                      SCIENCE TERMS    None   \n",
+       "19988                     U.S. GEOGRAPHY    $100   \n",
+       "19989                 POP MUSIC PAIRINGS    $100   \n",
+       "19990                    HISTORIC PEOPLE    $100   \n",
+       "19991                    1998 QUOTATIONS    $100   \n",
+       "19992                         LLAMA-RAMA    $100   \n",
+       "19993                          DING DONG    $100   \n",
+       "19994                     U.S. GEOGRAPHY    $200   \n",
+       "19995                 POP MUSIC PAIRINGS    $200   \n",
+       "19996                    HISTORIC PEOPLE    $200   \n",
+       "19997                    1998 QUOTATIONS    $200   \n",
+       "19998                         LLAMA-RAMA    $200   \n",
+       "\n",
+       "                                                Question  \\\n",
+       "0      For the last 8 years of his life, Galileo was ...   \n",
+       "1      No. 2: 1912 Olympian; football star at Carlisl...   \n",
+       "2      The city of Yuma in this state has a record av...   \n",
+       "3      In 1963, live on \"The Art Linkletter Show\", th...   \n",
+       "4      Signer of the Dec. of Indep., framer of the Co...   \n",
+       "5      In the title of an Aesop fable, this insect sh...   \n",
+       "6      Built in 312 B.C. to link Rome & the South of ...   \n",
+       "7      No. 8: 30 steals for the Birmingham Barons; 2,...   \n",
+       "8      In the winter of 1971-72, a record 1,122 inche...   \n",
+       "9      This housewares store was named for the packag...   \n",
+       "10                                      \"And away we go\"   \n",
+       "11     Cows regurgitate this from the first stomach t...   \n",
+       "12     In 1000 Rajaraja I of the Cholas battled to ta...   \n",
+       "13     No. 1: Lettered in hoops, football & lacrosse ...   \n",
+       "14     On June 28, 1994 the nat'l weather service beg...   \n",
+       "15     This company's Accutron watch, introduced in 1...   \n",
+       "16     Outlaw: \"Murdered by a traitor and a coward wh...   \n",
+       "17     A small demon, or a mischievous child (who mig...   \n",
+       "18     Karl led the first of these Marxist organizati...   \n",
+       "19     No. 10: FB/LB for Columbia U. in the 1920s; MV...   \n",
+       "20     Africa's lowest temperature was 11 degrees bel...   \n",
+       "21     Edward Teller & this man partnered in 1898 to ...   \n",
+       "22     1939 Oscar winner: \"...you are a credit to you...   \n",
+       "23     In geologic time one of these, shorter than an...   \n",
+       "24     This Asian political party was founded in 1885...   \n",
+       "25     No. 5: Only center to lead the NBA in assists;...   \n",
+       "26     The Kirschner brothers, Don & Bill, named this...   \n",
+       "27     Revolutionary War hero: \"His spirit is in Verm...   \n",
+       "28     A single layer of paper, or to perform one's c...   \n",
+       "29     <a href=\"http://www.j-archive.com/media/2004-1...   \n",
+       "...                                                  ...   \n",
+       "19969  In 1960 the last of these locomotives was reti...   \n",
+       "19970  Kate: \"if I be waspish, best beware my sting\";...   \n",
+       "19971  This private college in Northern California bo...   \n",
+       "19972  She voiced Princess Pea in \"The Tale of Desper...   \n",
+       "19973  It's the name of the long-awaited new White Ho...   \n",
+       "19974  Langdon in \"Angels & Demons\" is looking for <a...   \n",
+       "19975  In the 1600s most of New York State was occupi...   \n",
+       "19976          Marina's dad (need a hint? he rules Tyre)   \n",
+       "19977  Presidential kids are welcome at this New Orle...   \n",
+       "19978  She didn't vamp it up & did a bella job as Em ...   \n",
+       "19979  Third syllable intoned by the giant who smells...   \n",
+       "19980  Much of \"Angels & Demons\" takes place at one o...   \n",
+       "19981  In 1899 Secretary of State John Hay proclaimed...   \n",
+       "19982  Fruity surname of Peter in \"A Midsummer Night'...   \n",
+       "19983  Quincy Jones, Kevin Eubanks & Branford Marsali...   \n",
+       "19984  In 2009 she returned to being \"Fast & Furious\"...   \n",
+       "19985  The book of Genesis says this ancient city \"of...   \n",
+       "19986  \"Habakkuk and the Angel\" is one of a series of...   \n",
+       "19987  In medieval England, it meant the smallest uni...   \n",
+       "19988  This Texas city is the largest in the U.S. to ...   \n",
+       "19989                                  ...& the Crickets   \n",
+       "19990  In the 990s this son of Erik the Red brought C...   \n",
+       "19991  Concerning a failed Windows 98 demonstration, ...   \n",
+       "19992  This llama product is used to make hats, blank...   \n",
+       "19993  In 1967 this company introduced its chocolate-...   \n",
+       "19994  Of 8, 12 or 18, the number of U.S. states that...   \n",
+       "19995                      ...& the New Power Generation   \n",
+       "19996  In 1589 he was appointed professor of mathemat...   \n",
+       "19997  Before the grand jury she said, \"I'm really so...   \n",
+       "19998  Llamas are the heftiest South American members...   \n",
+       "\n",
+       "                                                  Answer  \n",
+       "0                                             Copernicus  \n",
+       "1                                             Jim Thorpe  \n",
+       "2                                                Arizona  \n",
+       "3                                             McDonald's  \n",
+       "4                                             John Adams  \n",
+       "5                                                the ant  \n",
+       "6                                         the Appian Way  \n",
+       "7                                         Michael Jordan  \n",
+       "8                                             Washington  \n",
+       "9                                         Crate & Barrel  \n",
+       "10                                        Jackie Gleason  \n",
+       "11                                               the cud  \n",
+       "12                                 Ceylon (or Sri Lanka)  \n",
+       "13                                             Jim Brown  \n",
+       "14                                          the UV index  \n",
+       "15                                                Bulova  \n",
+       "16                                           Jesse James  \n",
+       "17                                                   imp  \n",
+       "18                                     the International  \n",
+       "19                                          (Lou) Gehrig  \n",
+       "20                                               Morocco  \n",
+       "21                                         (Paul) Bonwit  \n",
+       "22     Hattie McDaniel (for her role in Gone with the...  \n",
+       "23                                                   era  \n",
+       "24                                    the Congress Party  \n",
+       "25                                    (Wilt) Chamberlain  \n",
+       "26                                                    K2  \n",
+       "27                                           Ethan Allen  \n",
+       "28                                                   ply  \n",
+       "29                                                Horton  \n",
+       "...                                                  ...  \n",
+       "19969                                      steam engines  \n",
+       "19970                                          Petruchio  \n",
+       "19971                                Stanford University  \n",
+       "19972                                        Emma Watson  \n",
+       "19973                                                 Bo  \n",
+       "19974                                 an antimatter bomb  \n",
+       "19975                                       the Iroquois  \n",
+       "19976                                           Pericles  \n",
+       "19977                                             Tulane  \n",
+       "19978                                    Kristen Stewart  \n",
+       "19979                                                 fo  \n",
+       "19980                                         a conclave  \n",
+       "19981                                   open-door policy  \n",
+       "19982                                             Quince  \n",
+       "19983                                            Berklee  \n",
+       "19984                                 Michelle Rodriguez  \n",
+       "19985                                                 Ur  \n",
+       "19986                                            Bernini  \n",
+       "19987                                               atom  \n",
+       "19988                                Houston (Lee Brown)  \n",
+       "19989                                        Buddy Holly  \n",
+       "19990                                       Leif Ericson  \n",
+       "19991                                         Bill Gates  \n",
+       "19992                                               Wool  \n",
+       "19993                                            Hostess  \n",
+       "19994                                                 18  \n",
+       "19995                                             Prince  \n",
+       "19996                                            Galileo  \n",
+       "19997                                    Monica Lewinsky  \n",
+       "19998                                             Camels  \n",
+       "\n",
+       "[19999 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas\n",
+    "import csv\n",
+    "\n",
+    "jeopardy = pandas.read_csv(\"jeopardy.csv\")\n",
+    "\n",
+    "jeopardy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Show Number', ' Air Date', ' Round', ' Category', ' Value',\n",
+       "       ' Question', ' Answer'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "jeopardy.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "jeopardy.columns = ['Show Number', 'Air Date', 'Round', 'Category', 'Value', 'Question', 'Answer']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "def normalize_text(text):\n",
+    "    text = text.lower()\n",
+    "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
+    "    return text\n",
+    "\n",
+    "def normalize_values(text):\n",
+    "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
+    "    try:\n",
+    "        text = int(text)\n",
+    "    except Exception:\n",
+    "        text = 0\n",
+    "    return text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "jeopardy[\"clean_question\"] = jeopardy[\"Question\"].apply(normalize_text)\n",
+    "jeopardy[\"clean_answer\"] = jeopardy[\"Answer\"].apply(normalize_text)\n",
+    "jeopardy[\"clean_value\"] = jeopardy[\"Value\"].apply(normalize_values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Show Number</th>\n",
+       "      <th>Air Date</th>\n",
+       "      <th>Round</th>\n",
+       "      <th>Category</th>\n",
+       "      <th>Value</th>\n",
+       "      <th>Question</th>\n",
+       "      <th>Answer</th>\n",
+       "      <th>clean_question</th>\n",
+       "      <th>clean_answer</th>\n",
+       "      <th>clean_value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>For the last 8 years of his life, Galileo was ...</td>\n",
+       "      <td>Copernicus</td>\n",
+       "      <td>for the last 8 years of his life galileo was u...</td>\n",
+       "      <td>copernicus</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>No. 2: 1912 Olympian; football star at Carlisl...</td>\n",
+       "      <td>Jim Thorpe</td>\n",
+       "      <td>no 2 1912 olympian football star at carlisle i...</td>\n",
+       "      <td>jim thorpe</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>The city of Yuma in this state has a record av...</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>the city of yuma in this state has a record av...</td>\n",
+       "      <td>arizona</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>In 1963, live on \"The Art Linkletter Show\", th...</td>\n",
+       "      <td>McDonald's</td>\n",
+       "      <td>in 1963 live on the art linkletter show this c...</td>\n",
+       "      <td>mcdonalds</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Signer of the Dec. of Indep., framer of the Co...</td>\n",
+       "      <td>John Adams</td>\n",
+       "      <td>signer of the dec of indep framer of the const...</td>\n",
+       "      <td>john adams</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>In the title of an Aesop fable, this insect sh...</td>\n",
+       "      <td>the ant</td>\n",
+       "      <td>in the title of an aesop fable this insect sha...</td>\n",
+       "      <td>the ant</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>Built in 312 B.C. to link Rome &amp; the South of ...</td>\n",
+       "      <td>the Appian Way</td>\n",
+       "      <td>built in 312 bc to link rome  the south of ita...</td>\n",
+       "      <td>the appian way</td>\n",
+       "      <td>400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>No. 8: 30 steals for the Birmingham Barons; 2,...</td>\n",
+       "      <td>Michael Jordan</td>\n",
+       "      <td>no 8 30 steals for the birmingham barons 2306 ...</td>\n",
+       "      <td>michael jordan</td>\n",
+       "      <td>400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>In the winter of 1971-72, a record 1,122 inche...</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>in the winter of 197172 a record 1122 inches o...</td>\n",
+       "      <td>washington</td>\n",
+       "      <td>400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>This housewares store was named for the packag...</td>\n",
+       "      <td>Crate &amp; Barrel</td>\n",
+       "      <td>this housewares store was named for the packag...</td>\n",
+       "      <td>crate  barrel</td>\n",
+       "      <td>400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>\"And away we go\"</td>\n",
+       "      <td>Jackie Gleason</td>\n",
+       "      <td>and away we go</td>\n",
+       "      <td>jackie gleason</td>\n",
+       "      <td>400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>Cows regurgitate this from the first stomach t...</td>\n",
+       "      <td>the cud</td>\n",
+       "      <td>cows regurgitate this from the first stomach t...</td>\n",
+       "      <td>the cud</td>\n",
+       "      <td>400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>In 1000 Rajaraja I of the Cholas battled to ta...</td>\n",
+       "      <td>Ceylon (or Sri Lanka)</td>\n",
+       "      <td>in 1000 rajaraja i of the cholas battled to ta...</td>\n",
+       "      <td>ceylon or sri lanka</td>\n",
+       "      <td>600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>No. 1: Lettered in hoops, football &amp; lacrosse ...</td>\n",
+       "      <td>Jim Brown</td>\n",
+       "      <td>no 1 lettered in hoops football  lacrosse at s...</td>\n",
+       "      <td>jim brown</td>\n",
+       "      <td>600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>On June 28, 1994 the nat'l weather service beg...</td>\n",
+       "      <td>the UV index</td>\n",
+       "      <td>on june 28 1994 the natl weather service began...</td>\n",
+       "      <td>the uv index</td>\n",
+       "      <td>600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>This company's Accutron watch, introduced in 1...</td>\n",
+       "      <td>Bulova</td>\n",
+       "      <td>this companys accutron watch introduced in 196...</td>\n",
+       "      <td>bulova</td>\n",
+       "      <td>600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>Outlaw: \"Murdered by a traitor and a coward wh...</td>\n",
+       "      <td>Jesse James</td>\n",
+       "      <td>outlaw murdered by a traitor and a coward whos...</td>\n",
+       "      <td>jesse james</td>\n",
+       "      <td>600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$600</td>\n",
+       "      <td>A small demon, or a mischievous child (who mig...</td>\n",
+       "      <td>imp</td>\n",
+       "      <td>a small demon or a mischievous child who might...</td>\n",
+       "      <td>imp</td>\n",
+       "      <td>600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>Karl led the first of these Marxist organizati...</td>\n",
+       "      <td>the International</td>\n",
+       "      <td>karl led the first of these marxist organizati...</td>\n",
+       "      <td>the international</td>\n",
+       "      <td>800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>No. 10: FB/LB for Columbia U. in the 1920s; MV...</td>\n",
+       "      <td>(Lou) Gehrig</td>\n",
+       "      <td>no 10 fblb for columbia u in the 1920s mvp for...</td>\n",
+       "      <td>lou gehrig</td>\n",
+       "      <td>800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EVERYBODY TALKS ABOUT IT...</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>Africa's lowest temperature was 11 degrees bel...</td>\n",
+       "      <td>Morocco</td>\n",
+       "      <td>africas lowest temperature was 11 degrees belo...</td>\n",
+       "      <td>morocco</td>\n",
+       "      <td>800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>Edward Teller &amp; this man partnered in 1898 to ...</td>\n",
+       "      <td>(Paul) Bonwit</td>\n",
+       "      <td>edward teller  this man partnered in 1898 to s...</td>\n",
+       "      <td>paul bonwit</td>\n",
+       "      <td>800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$2,000</td>\n",
+       "      <td>1939 Oscar winner: \"...you are a credit to you...</td>\n",
+       "      <td>Hattie McDaniel (for her role in Gone with the...</td>\n",
+       "      <td>1939 oscar winner you are a credit to your cra...</td>\n",
+       "      <td>hattie mcdaniel for her role in gone with the ...</td>\n",
+       "      <td>2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$800</td>\n",
+       "      <td>In geologic time one of these, shorter than an...</td>\n",
+       "      <td>era</td>\n",
+       "      <td>in geologic time one of these shorter than an ...</td>\n",
+       "      <td>era</td>\n",
+       "      <td>800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORY</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>This Asian political party was founded in 1885...</td>\n",
+       "      <td>the Congress Party</td>\n",
+       "      <td>this asian political party was founded in 1885...</td>\n",
+       "      <td>the congress party</td>\n",
+       "      <td>1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>ESPN's TOP 10 ALL-TIME ATHLETES</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>No. 5: Only center to lead the NBA in assists;...</td>\n",
+       "      <td>(Wilt) Chamberlain</td>\n",
+       "      <td>no 5 only center to lead the nba in assists tr...</td>\n",
+       "      <td>wilt chamberlain</td>\n",
+       "      <td>1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>THE COMPANY LINE</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>The Kirschner brothers, Don &amp; Bill, named this...</td>\n",
+       "      <td>K2</td>\n",
+       "      <td>the kirschner brothers don  bill named this sk...</td>\n",
+       "      <td>k2</td>\n",
+       "      <td>1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>EPITAPHS &amp; TRIBUTES</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>Revolutionary War hero: \"His spirit is in Verm...</td>\n",
+       "      <td>Ethan Allen</td>\n",
+       "      <td>revolutionary war hero his spirit is in vermon...</td>\n",
+       "      <td>ethan allen</td>\n",
+       "      <td>1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>3-LETTER WORDS</td>\n",
+       "      <td>$1000</td>\n",
+       "      <td>A single layer of paper, or to perform one's c...</td>\n",
+       "      <td>ply</td>\n",
+       "      <td>a single layer of paper or to perform ones cra...</td>\n",
+       "      <td>ply</td>\n",
+       "      <td>1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>4680</td>\n",
+       "      <td>2004-12-31</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>DR. SEUSS AT THE MULTIPLEX</td>\n",
+       "      <td>$400</td>\n",
+       "      <td>&lt;a href=\"http://www.j-archive.com/media/2004-1...</td>\n",
+       "      <td>Horton</td>\n",
+       "      <td>a hrefhttpwwwjarchivecommedia20041231dj23mp3be...</td>\n",
+       "      <td>horton</td>\n",
+       "      <td>400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19969</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>AMERICAN HISTORY</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>In 1960 the last of these locomotives was reti...</td>\n",
+       "      <td>steam engines</td>\n",
+       "      <td>in 1960 the last of these locomotives was reti...</td>\n",
+       "      <td>steam engines</td>\n",
+       "      <td>1200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19970</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>MIND YOUR SHAKESPEARE \"P\"s &amp; \"Q\"s</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>Kate: \"if I be waspish, best beware my sting\";...</td>\n",
+       "      <td>Petruchio</td>\n",
+       "      <td>kate if i be waspish best beware my sting his ...</td>\n",
+       "      <td>petruchio</td>\n",
+       "      <td>1200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19971</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ALMA MATERS</td>\n",
+       "      <td>$1,500</td>\n",
+       "      <td>This private college in Northern California bo...</td>\n",
+       "      <td>Stanford University</td>\n",
+       "      <td>this private college in northern california bo...</td>\n",
+       "      <td>stanford university</td>\n",
+       "      <td>1500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19972</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ACTRESSES</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>She voiced Princess Pea in \"The Tale of Desper...</td>\n",
+       "      <td>Emma Watson</td>\n",
+       "      <td>she voiced princess pea in the tale of despere...</td>\n",
+       "      <td>emma watson</td>\n",
+       "      <td>1200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19973</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>2-LETTER WORDS</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>It's the name of the long-awaited new White Ho...</td>\n",
+       "      <td>Bo</td>\n",
+       "      <td>its the name of the longawaited new white hous...</td>\n",
+       "      <td>bo</td>\n",
+       "      <td>1200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19974</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ANGELS &amp; DEMONS</td>\n",
+       "      <td>$1200</td>\n",
+       "      <td>Langdon in \"Angels &amp; Demons\" is looking for &lt;a...</td>\n",
+       "      <td>an antimatter bomb</td>\n",
+       "      <td>langdon in angels  demons is looking for a hre...</td>\n",
+       "      <td>an antimatter bomb</td>\n",
+       "      <td>1200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19975</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>AMERICAN HISTORY</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>In the 1600s most of New York State was occupi...</td>\n",
+       "      <td>the Iroquois</td>\n",
+       "      <td>in the 1600s most of new york state was occupi...</td>\n",
+       "      <td>the iroquois</td>\n",
+       "      <td>1600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19976</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>MIND YOUR SHAKESPEARE \"P\"s &amp; \"Q\"s</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Marina's dad (need a hint? he rules Tyre)</td>\n",
+       "      <td>Pericles</td>\n",
+       "      <td>marinas dad need a hint he rules tyre</td>\n",
+       "      <td>pericles</td>\n",
+       "      <td>1600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19977</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ALMA MATERS</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Presidential kids are welcome at this New Orle...</td>\n",
+       "      <td>Tulane</td>\n",
+       "      <td>presidential kids are welcome at this new orle...</td>\n",
+       "      <td>tulane</td>\n",
+       "      <td>1600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19978</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ACTRESSES</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>She didn't vamp it up &amp; did a bella job as Em ...</td>\n",
+       "      <td>Kristen Stewart</td>\n",
+       "      <td>she didnt vamp it up  did a bella job as em in...</td>\n",
+       "      <td>kristen stewart</td>\n",
+       "      <td>1600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19979</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>2-LETTER WORDS</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Third syllable intoned by the giant who smells...</td>\n",
+       "      <td>fo</td>\n",
+       "      <td>third syllable intoned by the giant who smells...</td>\n",
+       "      <td>fo</td>\n",
+       "      <td>1600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19980</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ANGELS &amp; DEMONS</td>\n",
+       "      <td>$1600</td>\n",
+       "      <td>Much of \"Angels &amp; Demons\" takes place at one o...</td>\n",
+       "      <td>a conclave</td>\n",
+       "      <td>much of angels  demons takes place at one of a...</td>\n",
+       "      <td>a conclave</td>\n",
+       "      <td>1600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19981</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>AMERICAN HISTORY</td>\n",
+       "      <td>$1,200</td>\n",
+       "      <td>In 1899 Secretary of State John Hay proclaimed...</td>\n",
+       "      <td>open-door policy</td>\n",
+       "      <td>in 1899 secretary of state john hay proclaimed...</td>\n",
+       "      <td>opendoor policy</td>\n",
+       "      <td>1200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19982</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>MIND YOUR SHAKESPEARE \"P\"s &amp; \"Q\"s</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>Fruity surname of Peter in \"A Midsummer Night'...</td>\n",
+       "      <td>Quince</td>\n",
+       "      <td>fruity surname of peter in a midsummer nights ...</td>\n",
+       "      <td>quince</td>\n",
+       "      <td>2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19983</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ALMA MATERS</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>Quincy Jones, Kevin Eubanks &amp; Branford Marsali...</td>\n",
+       "      <td>Berklee</td>\n",
+       "      <td>quincy jones kevin eubanks  branford marsalis ...</td>\n",
+       "      <td>berklee</td>\n",
+       "      <td>2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19984</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ACTRESSES</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>In 2009 she returned to being \"Fast &amp; Furious\"...</td>\n",
+       "      <td>Michelle Rodriguez</td>\n",
+       "      <td>in 2009 she returned to being fast  furious as...</td>\n",
+       "      <td>michelle rodriguez</td>\n",
+       "      <td>2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19985</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>2-LETTER WORDS</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>The book of Genesis says this ancient city \"of...</td>\n",
+       "      <td>Ur</td>\n",
+       "      <td>the book of genesis says this ancient city of ...</td>\n",
+       "      <td>ur</td>\n",
+       "      <td>2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19986</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Double Jeopardy!</td>\n",
+       "      <td>ANGELS &amp; DEMONS</td>\n",
+       "      <td>$2000</td>\n",
+       "      <td>\"Habakkuk and the Angel\" is one of a series of...</td>\n",
+       "      <td>Bernini</td>\n",
+       "      <td>habakkuk and the angel is one of a series of a...</td>\n",
+       "      <td>bernini</td>\n",
+       "      <td>2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19987</th>\n",
+       "      <td>5694</td>\n",
+       "      <td>2009-05-14</td>\n",
+       "      <td>Final Jeopardy!</td>\n",
+       "      <td>SCIENCE TERMS</td>\n",
+       "      <td>None</td>\n",
+       "      <td>In medieval England, it meant the smallest uni...</td>\n",
+       "      <td>atom</td>\n",
+       "      <td>in medieval england it meant the smallest unit...</td>\n",
+       "      <td>atom</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19988</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>U.S. GEOGRAPHY</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>This Texas city is the largest in the U.S. to ...</td>\n",
+       "      <td>Houston (Lee Brown)</td>\n",
+       "      <td>this texas city is the largest in the us to ha...</td>\n",
+       "      <td>houston lee brown</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19989</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>POP MUSIC PAIRINGS</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>...&amp; the Crickets</td>\n",
+       "      <td>Buddy Holly</td>\n",
+       "      <td>the crickets</td>\n",
+       "      <td>buddy holly</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19990</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORIC PEOPLE</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>In the 990s this son of Erik the Red brought C...</td>\n",
+       "      <td>Leif Ericson</td>\n",
+       "      <td>in the 990s this son of erik the red brought c...</td>\n",
+       "      <td>leif ericson</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19991</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>1998 QUOTATIONS</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>Concerning a failed Windows 98 demonstration, ...</td>\n",
+       "      <td>Bill Gates</td>\n",
+       "      <td>concerning a failed windows 98 demonstration h...</td>\n",
+       "      <td>bill gates</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19992</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>LLAMA-RAMA</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>This llama product is used to make hats, blank...</td>\n",
+       "      <td>Wool</td>\n",
+       "      <td>this llama product is used to make hats blanke...</td>\n",
+       "      <td>wool</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19993</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>DING DONG</td>\n",
+       "      <td>$100</td>\n",
+       "      <td>In 1967 this company introduced its chocolate-...</td>\n",
+       "      <td>Hostess</td>\n",
+       "      <td>in 1967 this company introduced its chocolatec...</td>\n",
+       "      <td>hostess</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19994</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>U.S. GEOGRAPHY</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Of 8, 12 or 18, the number of U.S. states that...</td>\n",
+       "      <td>18</td>\n",
+       "      <td>of 8 12 or 18 the number of us states that tou...</td>\n",
+       "      <td>18</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19995</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>POP MUSIC PAIRINGS</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>...&amp; the New Power Generation</td>\n",
+       "      <td>Prince</td>\n",
+       "      <td>the new power generation</td>\n",
+       "      <td>prince</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19996</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>HISTORIC PEOPLE</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>In 1589 he was appointed professor of mathemat...</td>\n",
+       "      <td>Galileo</td>\n",
+       "      <td>in 1589 he was appointed professor of mathemat...</td>\n",
+       "      <td>galileo</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19997</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>1998 QUOTATIONS</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Before the grand jury she said, \"I'm really so...</td>\n",
+       "      <td>Monica Lewinsky</td>\n",
+       "      <td>before the grand jury she said im really sorry...</td>\n",
+       "      <td>monica lewinsky</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19998</th>\n",
+       "      <td>3582</td>\n",
+       "      <td>2000-03-14</td>\n",
+       "      <td>Jeopardy!</td>\n",
+       "      <td>LLAMA-RAMA</td>\n",
+       "      <td>$200</td>\n",
+       "      <td>Llamas are the heftiest South American members...</td>\n",
+       "      <td>Camels</td>\n",
+       "      <td>llamas are the heftiest south american members...</td>\n",
+       "      <td>camels</td>\n",
+       "      <td>200</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>19999 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Show Number   Air Date             Round  \\\n",
+       "0             4680 2004-12-31         Jeopardy!   \n",
+       "1             4680 2004-12-31         Jeopardy!   \n",
+       "2             4680 2004-12-31         Jeopardy!   \n",
+       "3             4680 2004-12-31         Jeopardy!   \n",
+       "4             4680 2004-12-31         Jeopardy!   \n",
+       "5             4680 2004-12-31         Jeopardy!   \n",
+       "6             4680 2004-12-31         Jeopardy!   \n",
+       "7             4680 2004-12-31         Jeopardy!   \n",
+       "8             4680 2004-12-31         Jeopardy!   \n",
+       "9             4680 2004-12-31         Jeopardy!   \n",
+       "10            4680 2004-12-31         Jeopardy!   \n",
+       "11            4680 2004-12-31         Jeopardy!   \n",
+       "12            4680 2004-12-31         Jeopardy!   \n",
+       "13            4680 2004-12-31         Jeopardy!   \n",
+       "14            4680 2004-12-31         Jeopardy!   \n",
+       "15            4680 2004-12-31         Jeopardy!   \n",
+       "16            4680 2004-12-31         Jeopardy!   \n",
+       "17            4680 2004-12-31         Jeopardy!   \n",
+       "18            4680 2004-12-31         Jeopardy!   \n",
+       "19            4680 2004-12-31         Jeopardy!   \n",
+       "20            4680 2004-12-31         Jeopardy!   \n",
+       "21            4680 2004-12-31         Jeopardy!   \n",
+       "22            4680 2004-12-31         Jeopardy!   \n",
+       "23            4680 2004-12-31         Jeopardy!   \n",
+       "24            4680 2004-12-31         Jeopardy!   \n",
+       "25            4680 2004-12-31         Jeopardy!   \n",
+       "26            4680 2004-12-31         Jeopardy!   \n",
+       "27            4680 2004-12-31         Jeopardy!   \n",
+       "28            4680 2004-12-31         Jeopardy!   \n",
+       "29            4680 2004-12-31  Double Jeopardy!   \n",
+       "...            ...        ...               ...   \n",
+       "19969         5694 2009-05-14  Double Jeopardy!   \n",
+       "19970         5694 2009-05-14  Double Jeopardy!   \n",
+       "19971         5694 2009-05-14  Double Jeopardy!   \n",
+       "19972         5694 2009-05-14  Double Jeopardy!   \n",
+       "19973         5694 2009-05-14  Double Jeopardy!   \n",
+       "19974         5694 2009-05-14  Double Jeopardy!   \n",
+       "19975         5694 2009-05-14  Double Jeopardy!   \n",
+       "19976         5694 2009-05-14  Double Jeopardy!   \n",
+       "19977         5694 2009-05-14  Double Jeopardy!   \n",
+       "19978         5694 2009-05-14  Double Jeopardy!   \n",
+       "19979         5694 2009-05-14  Double Jeopardy!   \n",
+       "19980         5694 2009-05-14  Double Jeopardy!   \n",
+       "19981         5694 2009-05-14  Double Jeopardy!   \n",
+       "19982         5694 2009-05-14  Double Jeopardy!   \n",
+       "19983         5694 2009-05-14  Double Jeopardy!   \n",
+       "19984         5694 2009-05-14  Double Jeopardy!   \n",
+       "19985         5694 2009-05-14  Double Jeopardy!   \n",
+       "19986         5694 2009-05-14  Double Jeopardy!   \n",
+       "19987         5694 2009-05-14   Final Jeopardy!   \n",
+       "19988         3582 2000-03-14         Jeopardy!   \n",
+       "19989         3582 2000-03-14         Jeopardy!   \n",
+       "19990         3582 2000-03-14         Jeopardy!   \n",
+       "19991         3582 2000-03-14         Jeopardy!   \n",
+       "19992         3582 2000-03-14         Jeopardy!   \n",
+       "19993         3582 2000-03-14         Jeopardy!   \n",
+       "19994         3582 2000-03-14         Jeopardy!   \n",
+       "19995         3582 2000-03-14         Jeopardy!   \n",
+       "19996         3582 2000-03-14         Jeopardy!   \n",
+       "19997         3582 2000-03-14         Jeopardy!   \n",
+       "19998         3582 2000-03-14         Jeopardy!   \n",
+       "\n",
+       "                                Category   Value  \\\n",
+       "0                                HISTORY    $200   \n",
+       "1        ESPN's TOP 10 ALL-TIME ATHLETES    $200   \n",
+       "2            EVERYBODY TALKS ABOUT IT...    $200   \n",
+       "3                       THE COMPANY LINE    $200   \n",
+       "4                    EPITAPHS & TRIBUTES    $200   \n",
+       "5                         3-LETTER WORDS    $200   \n",
+       "6                                HISTORY    $400   \n",
+       "7        ESPN's TOP 10 ALL-TIME ATHLETES    $400   \n",
+       "8            EVERYBODY TALKS ABOUT IT...    $400   \n",
+       "9                       THE COMPANY LINE    $400   \n",
+       "10                   EPITAPHS & TRIBUTES    $400   \n",
+       "11                        3-LETTER WORDS    $400   \n",
+       "12                               HISTORY    $600   \n",
+       "13       ESPN's TOP 10 ALL-TIME ATHLETES    $600   \n",
+       "14           EVERYBODY TALKS ABOUT IT...    $600   \n",
+       "15                      THE COMPANY LINE    $600   \n",
+       "16                   EPITAPHS & TRIBUTES    $600   \n",
+       "17                        3-LETTER WORDS    $600   \n",
+       "18                               HISTORY    $800   \n",
+       "19       ESPN's TOP 10 ALL-TIME ATHLETES    $800   \n",
+       "20           EVERYBODY TALKS ABOUT IT...    $800   \n",
+       "21                      THE COMPANY LINE    $800   \n",
+       "22                   EPITAPHS & TRIBUTES  $2,000   \n",
+       "23                        3-LETTER WORDS    $800   \n",
+       "24                               HISTORY   $1000   \n",
+       "25       ESPN's TOP 10 ALL-TIME ATHLETES   $1000   \n",
+       "26                      THE COMPANY LINE   $1000   \n",
+       "27                   EPITAPHS & TRIBUTES   $1000   \n",
+       "28                        3-LETTER WORDS   $1000   \n",
+       "29            DR. SEUSS AT THE MULTIPLEX    $400   \n",
+       "...                                  ...     ...   \n",
+       "19969                   AMERICAN HISTORY   $1200   \n",
+       "19970  MIND YOUR SHAKESPEARE \"P\"s & \"Q\"s   $1200   \n",
+       "19971                        ALMA MATERS  $1,500   \n",
+       "19972                          ACTRESSES   $1200   \n",
+       "19973                     2-LETTER WORDS   $1200   \n",
+       "19974                    ANGELS & DEMONS   $1200   \n",
+       "19975                   AMERICAN HISTORY   $1600   \n",
+       "19976  MIND YOUR SHAKESPEARE \"P\"s & \"Q\"s   $1600   \n",
+       "19977                        ALMA MATERS   $1600   \n",
+       "19978                          ACTRESSES   $1600   \n",
+       "19979                     2-LETTER WORDS   $1600   \n",
+       "19980                    ANGELS & DEMONS   $1600   \n",
+       "19981                   AMERICAN HISTORY  $1,200   \n",
+       "19982  MIND YOUR SHAKESPEARE \"P\"s & \"Q\"s   $2000   \n",
+       "19983                        ALMA MATERS   $2000   \n",
+       "19984                          ACTRESSES   $2000   \n",
+       "19985                     2-LETTER WORDS   $2000   \n",
+       "19986                    ANGELS & DEMONS   $2000   \n",
+       "19987                      SCIENCE TERMS    None   \n",
+       "19988                     U.S. GEOGRAPHY    $100   \n",
+       "19989                 POP MUSIC PAIRINGS    $100   \n",
+       "19990                    HISTORIC PEOPLE    $100   \n",
+       "19991                    1998 QUOTATIONS    $100   \n",
+       "19992                         LLAMA-RAMA    $100   \n",
+       "19993                          DING DONG    $100   \n",
+       "19994                     U.S. GEOGRAPHY    $200   \n",
+       "19995                 POP MUSIC PAIRINGS    $200   \n",
+       "19996                    HISTORIC PEOPLE    $200   \n",
+       "19997                    1998 QUOTATIONS    $200   \n",
+       "19998                         LLAMA-RAMA    $200   \n",
+       "\n",
+       "                                                Question  \\\n",
+       "0      For the last 8 years of his life, Galileo was ...   \n",
+       "1      No. 2: 1912 Olympian; football star at Carlisl...   \n",
+       "2      The city of Yuma in this state has a record av...   \n",
+       "3      In 1963, live on \"The Art Linkletter Show\", th...   \n",
+       "4      Signer of the Dec. of Indep., framer of the Co...   \n",
+       "5      In the title of an Aesop fable, this insect sh...   \n",
+       "6      Built in 312 B.C. to link Rome & the South of ...   \n",
+       "7      No. 8: 30 steals for the Birmingham Barons; 2,...   \n",
+       "8      In the winter of 1971-72, a record 1,122 inche...   \n",
+       "9      This housewares store was named for the packag...   \n",
+       "10                                      \"And away we go\"   \n",
+       "11     Cows regurgitate this from the first stomach t...   \n",
+       "12     In 1000 Rajaraja I of the Cholas battled to ta...   \n",
+       "13     No. 1: Lettered in hoops, football & lacrosse ...   \n",
+       "14     On June 28, 1994 the nat'l weather service beg...   \n",
+       "15     This company's Accutron watch, introduced in 1...   \n",
+       "16     Outlaw: \"Murdered by a traitor and a coward wh...   \n",
+       "17     A small demon, or a mischievous child (who mig...   \n",
+       "18     Karl led the first of these Marxist organizati...   \n",
+       "19     No. 10: FB/LB for Columbia U. in the 1920s; MV...   \n",
+       "20     Africa's lowest temperature was 11 degrees bel...   \n",
+       "21     Edward Teller & this man partnered in 1898 to ...   \n",
+       "22     1939 Oscar winner: \"...you are a credit to you...   \n",
+       "23     In geologic time one of these, shorter than an...   \n",
+       "24     This Asian political party was founded in 1885...   \n",
+       "25     No. 5: Only center to lead the NBA in assists;...   \n",
+       "26     The Kirschner brothers, Don & Bill, named this...   \n",
+       "27     Revolutionary War hero: \"His spirit is in Verm...   \n",
+       "28     A single layer of paper, or to perform one's c...   \n",
+       "29     <a href=\"http://www.j-archive.com/media/2004-1...   \n",
+       "...                                                  ...   \n",
+       "19969  In 1960 the last of these locomotives was reti...   \n",
+       "19970  Kate: \"if I be waspish, best beware my sting\";...   \n",
+       "19971  This private college in Northern California bo...   \n",
+       "19972  She voiced Princess Pea in \"The Tale of Desper...   \n",
+       "19973  It's the name of the long-awaited new White Ho...   \n",
+       "19974  Langdon in \"Angels & Demons\" is looking for <a...   \n",
+       "19975  In the 1600s most of New York State was occupi...   \n",
+       "19976          Marina's dad (need a hint? he rules Tyre)   \n",
+       "19977  Presidential kids are welcome at this New Orle...   \n",
+       "19978  She didn't vamp it up & did a bella job as Em ...   \n",
+       "19979  Third syllable intoned by the giant who smells...   \n",
+       "19980  Much of \"Angels & Demons\" takes place at one o...   \n",
+       "19981  In 1899 Secretary of State John Hay proclaimed...   \n",
+       "19982  Fruity surname of Peter in \"A Midsummer Night'...   \n",
+       "19983  Quincy Jones, Kevin Eubanks & Branford Marsali...   \n",
+       "19984  In 2009 she returned to being \"Fast & Furious\"...   \n",
+       "19985  The book of Genesis says this ancient city \"of...   \n",
+       "19986  \"Habakkuk and the Angel\" is one of a series of...   \n",
+       "19987  In medieval England, it meant the smallest uni...   \n",
+       "19988  This Texas city is the largest in the U.S. to ...   \n",
+       "19989                                  ...& the Crickets   \n",
+       "19990  In the 990s this son of Erik the Red brought C...   \n",
+       "19991  Concerning a failed Windows 98 demonstration, ...   \n",
+       "19992  This llama product is used to make hats, blank...   \n",
+       "19993  In 1967 this company introduced its chocolate-...   \n",
+       "19994  Of 8, 12 or 18, the number of U.S. states that...   \n",
+       "19995                      ...& the New Power Generation   \n",
+       "19996  In 1589 he was appointed professor of mathemat...   \n",
+       "19997  Before the grand jury she said, \"I'm really so...   \n",
+       "19998  Llamas are the heftiest South American members...   \n",
+       "\n",
+       "                                                  Answer  \\\n",
+       "0                                             Copernicus   \n",
+       "1                                             Jim Thorpe   \n",
+       "2                                                Arizona   \n",
+       "3                                             McDonald's   \n",
+       "4                                             John Adams   \n",
+       "5                                                the ant   \n",
+       "6                                         the Appian Way   \n",
+       "7                                         Michael Jordan   \n",
+       "8                                             Washington   \n",
+       "9                                         Crate & Barrel   \n",
+       "10                                        Jackie Gleason   \n",
+       "11                                               the cud   \n",
+       "12                                 Ceylon (or Sri Lanka)   \n",
+       "13                                             Jim Brown   \n",
+       "14                                          the UV index   \n",
+       "15                                                Bulova   \n",
+       "16                                           Jesse James   \n",
+       "17                                                   imp   \n",
+       "18                                     the International   \n",
+       "19                                          (Lou) Gehrig   \n",
+       "20                                               Morocco   \n",
+       "21                                         (Paul) Bonwit   \n",
+       "22     Hattie McDaniel (for her role in Gone with the...   \n",
+       "23                                                   era   \n",
+       "24                                    the Congress Party   \n",
+       "25                                    (Wilt) Chamberlain   \n",
+       "26                                                    K2   \n",
+       "27                                           Ethan Allen   \n",
+       "28                                                   ply   \n",
+       "29                                                Horton   \n",
+       "...                                                  ...   \n",
+       "19969                                      steam engines   \n",
+       "19970                                          Petruchio   \n",
+       "19971                                Stanford University   \n",
+       "19972                                        Emma Watson   \n",
+       "19973                                                 Bo   \n",
+       "19974                                 an antimatter bomb   \n",
+       "19975                                       the Iroquois   \n",
+       "19976                                           Pericles   \n",
+       "19977                                             Tulane   \n",
+       "19978                                    Kristen Stewart   \n",
+       "19979                                                 fo   \n",
+       "19980                                         a conclave   \n",
+       "19981                                   open-door policy   \n",
+       "19982                                             Quince   \n",
+       "19983                                            Berklee   \n",
+       "19984                                 Michelle Rodriguez   \n",
+       "19985                                                 Ur   \n",
+       "19986                                            Bernini   \n",
+       "19987                                               atom   \n",
+       "19988                                Houston (Lee Brown)   \n",
+       "19989                                        Buddy Holly   \n",
+       "19990                                       Leif Ericson   \n",
+       "19991                                         Bill Gates   \n",
+       "19992                                               Wool   \n",
+       "19993                                            Hostess   \n",
+       "19994                                                 18   \n",
+       "19995                                             Prince   \n",
+       "19996                                            Galileo   \n",
+       "19997                                    Monica Lewinsky   \n",
+       "19998                                             Camels   \n",
+       "\n",
+       "                                          clean_question  \\\n",
+       "0      for the last 8 years of his life galileo was u...   \n",
+       "1      no 2 1912 olympian football star at carlisle i...   \n",
+       "2      the city of yuma in this state has a record av...   \n",
+       "3      in 1963 live on the art linkletter show this c...   \n",
+       "4      signer of the dec of indep framer of the const...   \n",
+       "5      in the title of an aesop fable this insect sha...   \n",
+       "6      built in 312 bc to link rome  the south of ita...   \n",
+       "7      no 8 30 steals for the birmingham barons 2306 ...   \n",
+       "8      in the winter of 197172 a record 1122 inches o...   \n",
+       "9      this housewares store was named for the packag...   \n",
+       "10                                        and away we go   \n",
+       "11     cows regurgitate this from the first stomach t...   \n",
+       "12     in 1000 rajaraja i of the cholas battled to ta...   \n",
+       "13     no 1 lettered in hoops football  lacrosse at s...   \n",
+       "14     on june 28 1994 the natl weather service began...   \n",
+       "15     this companys accutron watch introduced in 196...   \n",
+       "16     outlaw murdered by a traitor and a coward whos...   \n",
+       "17     a small demon or a mischievous child who might...   \n",
+       "18     karl led the first of these marxist organizati...   \n",
+       "19     no 10 fblb for columbia u in the 1920s mvp for...   \n",
+       "20     africas lowest temperature was 11 degrees belo...   \n",
+       "21     edward teller  this man partnered in 1898 to s...   \n",
+       "22     1939 oscar winner you are a credit to your cra...   \n",
+       "23     in geologic time one of these shorter than an ...   \n",
+       "24     this asian political party was founded in 1885...   \n",
+       "25     no 5 only center to lead the nba in assists tr...   \n",
+       "26     the kirschner brothers don  bill named this sk...   \n",
+       "27     revolutionary war hero his spirit is in vermon...   \n",
+       "28     a single layer of paper or to perform ones cra...   \n",
+       "29     a hrefhttpwwwjarchivecommedia20041231dj23mp3be...   \n",
+       "...                                                  ...   \n",
+       "19969  in 1960 the last of these locomotives was reti...   \n",
+       "19970  kate if i be waspish best beware my sting his ...   \n",
+       "19971  this private college in northern california bo...   \n",
+       "19972  she voiced princess pea in the tale of despere...   \n",
+       "19973  its the name of the longawaited new white hous...   \n",
+       "19974  langdon in angels  demons is looking for a hre...   \n",
+       "19975  in the 1600s most of new york state was occupi...   \n",
+       "19976              marinas dad need a hint he rules tyre   \n",
+       "19977  presidential kids are welcome at this new orle...   \n",
+       "19978  she didnt vamp it up  did a bella job as em in...   \n",
+       "19979  third syllable intoned by the giant who smells...   \n",
+       "19980  much of angels  demons takes place at one of a...   \n",
+       "19981  in 1899 secretary of state john hay proclaimed...   \n",
+       "19982  fruity surname of peter in a midsummer nights ...   \n",
+       "19983  quincy jones kevin eubanks  branford marsalis ...   \n",
+       "19984  in 2009 she returned to being fast  furious as...   \n",
+       "19985  the book of genesis says this ancient city of ...   \n",
+       "19986  habakkuk and the angel is one of a series of a...   \n",
+       "19987  in medieval england it meant the smallest unit...   \n",
+       "19988  this texas city is the largest in the us to ha...   \n",
+       "19989                                       the crickets   \n",
+       "19990  in the 990s this son of erik the red brought c...   \n",
+       "19991  concerning a failed windows 98 demonstration h...   \n",
+       "19992  this llama product is used to make hats blanke...   \n",
+       "19993  in 1967 this company introduced its chocolatec...   \n",
+       "19994  of 8 12 or 18 the number of us states that tou...   \n",
+       "19995                           the new power generation   \n",
+       "19996  in 1589 he was appointed professor of mathemat...   \n",
+       "19997  before the grand jury she said im really sorry...   \n",
+       "19998  llamas are the heftiest south american members...   \n",
+       "\n",
+       "                                            clean_answer  clean_value  \n",
+       "0                                             copernicus          200  \n",
+       "1                                             jim thorpe          200  \n",
+       "2                                                arizona          200  \n",
+       "3                                              mcdonalds          200  \n",
+       "4                                             john adams          200  \n",
+       "5                                                the ant          200  \n",
+       "6                                         the appian way          400  \n",
+       "7                                         michael jordan          400  \n",
+       "8                                             washington          400  \n",
+       "9                                          crate  barrel          400  \n",
+       "10                                        jackie gleason          400  \n",
+       "11                                               the cud          400  \n",
+       "12                                   ceylon or sri lanka          600  \n",
+       "13                                             jim brown          600  \n",
+       "14                                          the uv index          600  \n",
+       "15                                                bulova          600  \n",
+       "16                                           jesse james          600  \n",
+       "17                                                   imp          600  \n",
+       "18                                     the international          800  \n",
+       "19                                            lou gehrig          800  \n",
+       "20                                               morocco          800  \n",
+       "21                                           paul bonwit          800  \n",
+       "22     hattie mcdaniel for her role in gone with the ...         2000  \n",
+       "23                                                   era          800  \n",
+       "24                                    the congress party         1000  \n",
+       "25                                      wilt chamberlain         1000  \n",
+       "26                                                    k2         1000  \n",
+       "27                                           ethan allen         1000  \n",
+       "28                                                   ply         1000  \n",
+       "29                                                horton          400  \n",
+       "...                                                  ...          ...  \n",
+       "19969                                      steam engines         1200  \n",
+       "19970                                          petruchio         1200  \n",
+       "19971                                stanford university         1500  \n",
+       "19972                                        emma watson         1200  \n",
+       "19973                                                 bo         1200  \n",
+       "19974                                 an antimatter bomb         1200  \n",
+       "19975                                       the iroquois         1600  \n",
+       "19976                                           pericles         1600  \n",
+       "19977                                             tulane         1600  \n",
+       "19978                                    kristen stewart         1600  \n",
+       "19979                                                 fo         1600  \n",
+       "19980                                         a conclave         1600  \n",
+       "19981                                    opendoor policy         1200  \n",
+       "19982                                             quince         2000  \n",
+       "19983                                            berklee         2000  \n",
+       "19984                                 michelle rodriguez         2000  \n",
+       "19985                                                 ur         2000  \n",
+       "19986                                            bernini         2000  \n",
+       "19987                                               atom            0  \n",
+       "19988                                  houston lee brown          100  \n",
+       "19989                                        buddy holly          100  \n",
+       "19990                                       leif ericson          100  \n",
+       "19991                                         bill gates          100  \n",
+       "19992                                               wool          100  \n",
+       "19993                                            hostess          100  \n",
+       "19994                                                 18          200  \n",
+       "19995                                             prince          200  \n",
+       "19996                                            galileo          200  \n",
+       "19997                                    monica lewinsky          200  \n",
+       "19998                                             camels          200  \n",
+       "\n",
+       "[19999 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "jeopardy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "jeopardy[\"Air Date\"] = pandas.to_datetime(jeopardy[\"Air Date\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Show Number                int64\n",
+       "Air Date          datetime64[ns]\n",
+       "Round                     object\n",
+       "Category                  object\n",
+       "Value                     object\n",
+       "Question                  object\n",
+       "Answer                    object\n",
+       "clean_question            object\n",
+       "clean_answer              object\n",
+       "clean_value                int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "jeopardy.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def count_matches(row):\n",
+    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
+    "    split_question = row[\"clean_question\"].split(\" \")\n",
+    "    if \"the\" in split_answer:\n",
+    "        split_answer.remove(\"the\")\n",
+    "    if len(split_answer) == 0:\n",
+    "        return 0\n",
+    "    match_count = 0\n",
+    "    for item in split_answer:\n",
+    "        if item in split_question:\n",
+    "            match_count += 1\n",
+    "    return match_count / len(split_answer)\n",
+    "\n",
+    "jeopardy[\"answer_in_question\"] = jeopardy.apply(count_matches, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.060493257069335872"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "jeopardy[\"answer_in_question\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Answer terms in the question\n",
+    "\n",
+    "The answer only appears in the question about `6%` of the time.  This isn't a huge number, and means that we probably can't just hope that hearing a question will enable us to figure out the answer.  We'll probably have to study."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.69087373156719623"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "question_overlap = []\n",
+    "terms_used = set()\n",
+    "for i, row in jeopardy.iterrows():\n",
+    "        split_question = row[\"clean_question\"].split(\" \")\n",
+    "        split_question = [q for q in split_question if len(q) > 5]\n",
+    "        match_count = 0\n",
+    "        for word in split_question:\n",
+    "            if word in terms_used:\n",
+    "                match_count += 1\n",
+    "        for word in split_question:\n",
+    "            terms_used.add(word)\n",
+    "        if len(split_question) > 0:\n",
+    "            match_count /= len(split_question)\n",
+    "        question_overlap.append(match_count)\n",
+    "jeopardy[\"question_overlap\"] = question_overlap\n",
+    "\n",
+    "jeopardy[\"question_overlap\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question overlap\n",
+    "\n",
+    "There is about `70%` overlap between terms in new questions and terms in old questions.  This only looks at a small set of questions, and it doesn't look at phrases, it looks at single terms.  This makes it relatively insignificant, but it does mean that it's worth looking more into the recycling of questions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def determine_value(row):\n",
+    "    value = 0\n",
+    "    if row[\"clean_value\"] > 800:\n",
+    "        value = 1\n",
+    "    return value\n",
+    "\n",
+    "jeopardy[\"high_value\"] = jeopardy.apply(determine_value, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(1, 2), (0, 1), (1, 0), (0, 1), (1, 1)]"
+      ]
+     },
+     "execution_count": 84,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def count_usage(term):\n",
+    "    low_count = 0\n",
+    "    high_count = 0\n",
+    "    for i, row in jeopardy.iterrows():\n",
+    "        if term in row[\"clean_question\"].split(\" \"):\n",
+    "            if row[\"high_value\"] == 1:\n",
+    "                high_count += 1\n",
+    "            else:\n",
+    "                low_count += 1\n",
+    "    return high_count, low_count\n",
+    "\n",
+    "comparison_terms = list(terms_used)[:5]\n",
+    "observed_expected = []\n",
+    "for term in comparison_terms:\n",
+    "    observed_expected.append(count_usage(term))\n",
+    "\n",
+    "observed_expected"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(0.031881167234403623, 0.85828871632352932),\n",
+       " (0.40196284612688399, 0.52607729857054686),\n",
+       " (2.4877921171956752, 0.11473257634454047),\n",
+       " (0.40196284612688399, 0.52607729857054686),\n",
+       " (0.44487748166127949, 0.50477764875459963)]"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from scipy.stats import chisquare\n",
+    "import numpy as np\n",
+    "\n",
+    "high_value_count = jeopardy[jeopardy[\"high_value\"] == 1].shape[0]\n",
+    "low_value_count = jeopardy[jeopardy[\"high_value\"] == 0].shape[0]\n",
+    "\n",
+    "chi_squared = []\n",
+    "for obs in observed_expected:\n",
+    "    total = sum(obs)\n",
+    "    total_prop = total / jeopardy.shape[0]\n",
+    "    high_value_exp = total_prop * high_value_count\n",
+    "    low_value_exp = total_prop * low_value_count\n",
+    "    \n",
+    "    observed = np.array([obs[0], obs[1]])\n",
+    "    expected = np.array([high_value_exp, low_value_exp])\n",
+    "    chi_squared.append(chisquare(observed, expected))\n",
+    "\n",
+    "chi_squared"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Chi-squared results\n",
+    "\n",
+    "None of the terms had a significant difference in usage between high value and low value rows.  Additionally, the frequencies were all lower than `5`, so the chi-squared test isn't as valid.  It would be better to run this test with only terms that have higher frequencies."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

+ 740 - 0
Mission213Solution.ipynb

@@ -0,0 +1,740 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>instant</th>\n",
+       "      <th>dteday</th>\n",
+       "      <th>season</th>\n",
+       "      <th>yr</th>\n",
+       "      <th>mnth</th>\n",
+       "      <th>hr</th>\n",
+       "      <th>holiday</th>\n",
+       "      <th>weekday</th>\n",
+       "      <th>workingday</th>\n",
+       "      <th>weathersit</th>\n",
+       "      <th>temp</th>\n",
+       "      <th>atemp</th>\n",
+       "      <th>hum</th>\n",
+       "      <th>windspeed</th>\n",
+       "      <th>casual</th>\n",
+       "      <th>registered</th>\n",
+       "      <th>cnt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.24</td>\n",
+       "      <td>0.2879</td>\n",
+       "      <td>0.81</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>13</td>\n",
+       "      <td>16</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.22</td>\n",
+       "      <td>0.2727</td>\n",
+       "      <td>0.80</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>32</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.22</td>\n",
+       "      <td>0.2727</td>\n",
+       "      <td>0.80</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>27</td>\n",
+       "      <td>32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.24</td>\n",
+       "      <td>0.2879</td>\n",
+       "      <td>0.75</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>10</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.24</td>\n",
+       "      <td>0.2879</td>\n",
+       "      <td>0.75</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   instant      dteday  season  yr  mnth  hr  holiday  weekday  workingday  \\\n",
+       "0        1  2011-01-01       1   0     1   0        0        6           0   \n",
+       "1        2  2011-01-01       1   0     1   1        0        6           0   \n",
+       "2        3  2011-01-01       1   0     1   2        0        6           0   \n",
+       "3        4  2011-01-01       1   0     1   3        0        6           0   \n",
+       "4        5  2011-01-01       1   0     1   4        0        6           0   \n",
+       "\n",
+       "   weathersit  temp   atemp   hum  windspeed  casual  registered  cnt  \n",
+       "0           1  0.24  0.2879  0.81          0       3          13   16  \n",
+       "1           1  0.22  0.2727  0.80          0       8          32   40  \n",
+       "2           1  0.22  0.2727  0.80          0       5          27   32  \n",
+       "3           1  0.24  0.2879  0.75          0       3          10   13  \n",
+       "4           1  0.24  0.2879  0.75          0       0           1    1  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas\n",
+    "\n",
+    "bike_rentals = pandas.read_csv(\"bike_rental_hour.csv\")\n",
+    "bike_rentals.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([ 6972.,  3705.,  2659.,  1660.,   987.,   663.,   369.,   188.,\n",
+       "          139.,    37.]),\n",
+       " array([   1. ,   98.6,  196.2,  293.8,  391.4,  489. ,  586.6,  684.2,\n",
+       "         781.8,  879.4,  977. ]),\n",
+       " <a list of 10 Patch objects>)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": [
+       "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
+       "AAALEgAACxIB0t1+/AAAFD5JREFUeJzt3V+MnNd93vHvI1FMaFsVK7igKJGFiWJViEVT22xFN7UR\n",
+       "OnVVxkgpXUkyUIJIiNywjt0WSCz6ouJV6gRoYxmFdBH/ESXYTFmnIWhUkEUrXjRAAa+dSA0jipXY\n",
+       "lqi4CZdu40hJilak+OvFnD2cEhR3lxzujDTfDzDgec97zsx5D7nz8D3vOzupKiRJArhp3AOQJE0O\n",
+       "Q0GS1BkKkqTOUJAkdYaCJKkzFCRJ3ZKhkOSvJ3lh6PF6ks8kuT3JsSSvJHkuyfqhPvuTvJrkZJL7\n",
+       "huq3JTne9j12ow5KknRtspLPKSS5CZgH7gV+EfifVfVrST4H/OWqeiTJVuAbwN8B7gK+A8xUVSWZ\n",
+       "Az5dVXNJngG+VFXPjviYJEnXaKXLR58ATlXVa8Au4GCrPwg80Mr3A4eq6nxVnQZOAduTbARuraq5\n",
+       "1u6poT6SpAmw0lB4GDjUyhuqaqGVF4ANrXwncGaozxkGZwyX18+3eknShFh2KCRZC/wj4N9dvq8G\n",
+       "a1D+vgxJeodbs4K2PwP8XlX9sG0vJLmjqs62paFzrX4e2DzUbxODM4T5Vh6un7/8RZIYLpK0QlWV\n",
+       "UTzPSkLhU1xaOgI4CuwBfrX9eWSo/htJ/jWD5aEZYK5daH4jyXZgDtgNfOnKLzXOXFhzEd5aV1Vv\n",
+       "jnEQJDlQVQfGOYZJ4Vxc4lxc4lxcMsr/TC8rFJK8l8FF5l8Yqv4CcDjJXuA08CBAVZ1Ichg4AVwA\n",
+       "9tWlW5z2AU8C64BnvPNIkibLskKhqv4CeP9ldX/CICiu1P5XgF+5Qv3vAX9z5cOUJK0GP9E8uWbH\n",
+       "PYAJMjvuAUyQ2XEPYILMjnsA70Yr+vDaahisjXlNQZKWK0mN6kKzZwqSpM5QkCR1hoIkqTMUJEmd\n",
+       "oSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\n",
+       "UJAkdYaCJKkzFCRJnaEgSeqWFQpJ1if5ZpKXk5xIsj3J7UmOJXklyXNJ1g+135/k1SQnk9w3VL8t\n",
+       "yfG277EbcUCSpGu33DOFx4Bnquoe4CeAk8AjwLGquht4vm2TZCvwELAV2Ak8niTteZ4A9lbVDDCT\n",
+       "ZOfIjkSSdN2WDIUktwEfq6qvAlTVhap6HdgFHGzNDgIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtq\n",
+       "qI8kaQIs50xhC/DDJF9L8vtJfiPJe4ENVbXQ2iwAG1r5TuDMUP8zwF1XqJ9v9ZKkCbFmmW0+DHy6\n",
+       "qr6f5Iu0paJFVVVJanTDOjBU3tEekiSAJDu4QW+MywmFM8CZqvp+2/4msB84m+SOqjrblobOtf3z\n",
+       "wOah/pvac8y38nD9/JVf8sAyhy9J06eqZoHZxe0kj47quZdcPqqqs8BrSe5uVZ8AXgK+BexpdXuA\n",
+       "I618FHg4ydokW4AZYK49zxvtzqUAu4f6SJImwHLOFAB+Efh6krXAfwV+DrgZOJxkL3AaeBCgqk4k\n",
+       "OQycAC4A+6pqcWlpH/AksI7B3UzPjug4JEkjkEvv15NhcG1inGNacxHeWldVb45xEJK0bEmqqrJ0\n",
+       "y6X5iWZJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5Q\n",
+       "kCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd2yQiHJ6SR/kOSF\n",
+       "JHOt7vYkx5K8kuS5JOuH2u9P8mqSk0nuG6rfluR42/fY6A9HknQ9lnumUMCOqvpQVd3b6h4BjlXV\n",
+       "3cDzbZskW4GHgK3ATuDxJGl9ngD2VtUMMJNk54iOQ5I0AitZPspl27uAg618EHigle8HDlXV+ao6\n",
+       "DZwCtifZCNxaVXOt3VNDfSRJE2AlZwrfSfKDJL/Q6jZU1UIrLwAbWvlO4MxQ3zPAXVeon2/1kqQJ\n",
+       "sWaZ7f5eVf1xkr8CHEtycnhnVVWSGt2wDgyVd7SHJAkgyQ5u0BvjskKhqv64/fnDJL8N3AssJLmj\n",
+       "qs62paFzrfk8sHmo+yYGZwjzrTxcP3/lVzyw/COQpClTVbPA7OJ2kkdH9dxLLh8leU+SW1v5vcB9\n",
+       "wHHgKLCnNdsDHGnlo8DDSdYm2QLMAHNVdRZ4I8n2duF591AfSdIEWM6Zwgbgt9sNRGuAr1fVc0l+\n",
+       "ABxOshc4DTwIUFUnkhwGTgAXgH1Vtbi0tA94ElgHPFNVz47wWCRJ1ymX3q8nw+DaxDjHtOYivLWu\n",
+       "qt4c4yAkadmSVFVdfofoNfETzZKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTO\n",
+       "UJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJn\n",
+       "KEiSumWFQpKbk7yQ5Ftt+/Ykx5K8kuS5JOuH2u5P8mqSk0nuG6rfluR42/fY6A9FknS9lnum8Fng\n",
+       "BFBt+xHgWFXdDTzftkmyFXgI2ArsBB5PktbnCWBvVc0AM0l2juYQJEmjsmQoJNkEfBL4MrD4Br8L\n",
+       "ONjKB4EHWvl+4FBVna+q08ApYHuSjcCtVTXX2j011EeSNCGWc6bw68AvAReH6jZU1UIrLwAbWvlO\n",
+       "4MxQuzPAXVeon2/1kqQJsuZqO5P8LHCuql5IsuNKbaqqktSV9l27A0PlHe0hSQJo78c7bsRzXzUU\n",
+       "gJ8EdiX5JPDjwF9K8jSwkOSOqjrblobOtfbzwOah/psYnCHMt/Jw/fzbv+yBFRyCJE2XqpoFZhe3\n",
+       "kzw6que+6vJRVX2+qjZX1RbgYeB3qmo3cBTY05rtAY608lHg4SRrk2wBZoC5qjoLvJFke7vwvHuo\n",
+       "jyRpQix1pnC5xWWiLwCHk+wFTgMPAlTViSSHGdypdAHYV1WLffYBTwLrgGeq6tnrG7okadRy6T17\n",
+       "MgyuT4xzTGsuwlvrqurNMQ5CkpYtSVVVlm65ND/RLEnqDAVJUmcoSJI6Q0GS1BkKkqRupbekTov/\n",
+       "e+n3+I3HqO4kkKSVMBTe1jhvizUPJI2Hy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNB\n",
+       "ktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1F01FJL8eJLvJXkxyYkk/7LV357k\n",
+       "WJJXkjyXZP1Qn/1JXk1yMsl9Q/Xbkhxv+x67cYckSbpWVw2Fqvo/wMer6oPATwAfT/JR4BHgWFXd\n",
+       "DTzftkmyFXgI2ArsBB7Ppe+1fALYW1UzwEySnTfigCRJ127J5aOq+t+tuBa4GfgRsAs42OoPAg+0\n",
+       "8v3Aoao6X1WngVPA9iQbgVuraq61e2qojyRpQiwZCkluSvIisAB8t6peAjZU1UJrsgBsaOU7gTND\n",
+       "3c8Ad12hfr7VS5ImyJqlGlTVReCDSW4Dvp3k45ftryQj/pb7A0PlHe0hSQJIsoMb9Ma4ZCgsqqrX\n",
+       "k/wHYBuwkOSOqjrblobOtWbzwOahbpsYnCHMt/Jw/fzbv9qB5Q5LkqZOVc0Cs4vbSR4d1XMvdffR\n",
+       "+xfvLEqyDvgHwAvAUWBPa7YHONLKR4GHk6xNsgWYAeaq6izwRpLt7cLz7qE+kqQJsdSZwkbgYJKb\n",
+       "GATI01X1fJIXgMNJ9gKngQcBqupEksPACeACsK+qFpeW9gFPAuuAZ6rq2VEfjCTp+uTSe/ZkGFyf\n",
+       "GOeY1lyEt24a7xhCVWXpdpI0eN8c1XuGn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6ZX+iWatr\n",
+       "9L86ZOW8LVaaPobCxBp3JpgH0jRy+UiS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpD\n",
+       "QZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVK3ZCgk2Zzku0leSvKHST7T6m9PcizJK0meS7J+\n",
+       "qM/+JK8mOZnkvqH6bUmOt32P3ZhDkiRdq+WcKZwH/llV/Q3gI8A/SXIP8AhwrKruBp5v2yTZCjwE\n",
+       "bAV2Ao8nWfzGlieAvVU1A8wk2TnSo5EkXZclQ6GqzlbVi63858DLwF3ALuBga3YQeKCV7wcOVdX5\n",
+       "qjoNnAK2J9kI3FpVc63dU0N9JEkTYEXXFJJ8APgQ8D1gQ1UttF0LwIZWvhM4M9TtDIMQubx+vtVL\n",
+       "kibEsr+jOcn7gN8CPltVf3ZpRQiqqkb7RfMHhso72kOSBJBkBzfojXFZoZDkFgaB8HRVHWnVC0nu\n",
+       "qKqzbWnoXKufBzYPdd/E4AxhvpWH6+ev/IoHljl8SZo+VTULzC5uJ3l0VM+9nLuPAnwFOFFVXxza\n",
+       "dRTY08p7gCND9Q8nWZtkCzADzFXVWeCNJNvbc+4e6iNJmgCpuvqqT5KPAv8R+ANgsfF+YA44DPxV\n",
+       "4DTwYFX9aevzeeDngQsMlpu+3eq3AU8C64BnquozV3i9uvQy47DmIrx103jHEMb7+oMxVFWWbidp\n",
+       "3JLUqH5elwyF1WYogKEgaSVGGQp+olmS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeqW/WsuNH1G+6tL\n",
+       "ro23xUqry1DQVYw7E8wDabW5fCRJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnq\n",
+       "DAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKlbMhSSfDXJQpLjQ3W3JzmW5JUkzyVZP7Rvf5JXk5xM\n",
+       "ct9Q/bYkx9u+x0Z/KJKk67WcM4WvATsvq3sEOFZVdwPPt22SbAUeAra2Po8nWfymlCeAvVU1A8wk\n",
+       "ufw5JUljtmQoVNXvAj+6rHoXcLCVDwIPtPL9wKGqOl9Vp4FTwPYkG4Fbq2qutXtqqI8kaUJc6zWF\n",
+       "DVW10MoLwIZWvhM4M9TuDHDXFernW70kaYJc93c0V1WN/gveDwyVd7SHptHo/22tTFX5RdGaOEl2\n",
+       "cIPeGK81FBaS3FFVZ9vS0LlWPw9sHmq3icEZwnwrD9fPv/3TH7jGYendZ5yZYB5oMlXVLDC7uJ3k\n",
+       "0VE997UuHx0F9rTyHuDIUP3DSdYm2QLMAHNVdRZ4I8n2duF591AfSdKEWPJMIckh4KeA9yd5DfgX\n",
+       "wBeAw0n2AqeBBwGq6kSSw8AJ4AKwr6oW/6u3D3gSWAc8U1XPjvZQJEnXK5fesyfDYA15nGNacxHe\n",
+       "umn8yxbj/ntxDBCvKegdIUmN6t+qn2iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK66/41F9K72bh/\n",
+       "zQb4qza0ugwF6arGnQnmgVaXy0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOj+nIE04P0Cn\n",
+       "1WQoSBNv3JlgHkwTl48kSZ2hIEnqDAVJUmcoSJI6LzRLWtK474Dy7qfVs+qhkGQn8EXgZuDLVfWr\n",
+       "qz0GSSs1zkzI2EMJpieYVnX5KMnNwL8BdgJbgU8luWc1x/DOMTvuAUyQ2XEPYILMjnsAY1JXeHz3\n",
+       "bepvxGN6rPY1hXuBU1V1uqrOA78J3L/KY3iHmB33ACbI7LgHMEFmxz2ACTI77gG8K612KNwFvDa0\n",
+       "fabVSZImwGpfU1jmedhPv35jh3E1b902vteWNKmm5bpGqlbvOJN8BDhQVTvb9n7g4vDF5kmYeEl6\n",
+       "pxlVYKx2KKwB/gvw94E/AuaAT1XVy6s2CEnS21rV5aOqupDk08C3GdyS+hUDQZImx6qeKUiSJtvE\n",
+       "/JqLJDuTnEzyapLPjXs8N1qSzUm+m+SlJH+Y5DOt/vYkx5K8kuS5JOuH+uxv83MyyX3jG/3oJbk5\n",
+       "yQtJvtW2p3IeAJKsT/LNJC8nOZFk+zTORzuul5IcT/KNJD82TfOQ5KtJFpIcH6pb8fEn2dbm8NUk\n",
+       "jy35wlU19geDpaRTwAeAW4AXgXvGPa4bfMx3AB9s5fcxuNZyD/BrwC+3+s8BX2jlrW1ebmnzdAq4\n",
+       "adzHMcL5+OfA14GjbXsq56Ed40Hg51t5DXDbtM1HO5b/BvxY2/63wJ5pmgfgY8CHgONDdSs5/sWV\n",
+       "oDng3lZ+Bth5tdedlDOFqftQW1WdraoXW/nPgZcZfGZjF4M3BdqfD7Ty/cChqjpfVacZ/KXfu6qD\n",
+       "vkGSbAI+CXyZS9/oMnXzAJDkNuBjVfVVGFyHq6rXmb75eAM4D7yn3aDyHgY3p0zNPFTV7wI/uqx6\n",
+       "Jce/PclG4Naqmmvtnhrqc0WTEgpT/aG2JB9g8D+C7wEbqmqh7VoANrTynQzmZdG7aY5+Hfgl4OJQ\n",
+       "3TTOA8AW4IdJvpbk95P8RpL3MmXzUVV/Avwr4H8wCIM/rapjTNk8XMFKj//y+nmWmJdJCYWpvdqd\n",
+       "5H3AbwGfrao/G95Xg/O9q83NO37ekvwscK6qXuBtvvdxGuZhyBrgw8DjVfVh4C+AR4YbTMN8JPlr\n",
+       "wD9lsBRyJ/C+JP94uM00zMPVLOP4r8mkhMI8sHloezP/f7q9KyW5hUEgPF1VR1r1QpI72v6NwLlW\n",
+       "f/kcbWp173Q/CexK8t+BQ8BPJ3ma6ZuHRWeAM1X1/bb9TQYhcXbK5uNvA/+pqv5XVV0A/j3wd5m+\n",
+       "ebjcSn4uzrT6TZfVX3VeJiUUfgDMJPlAkrXAQ8DRMY/phkoS4CvAiar64tCuowwuqNH+PDJU/3CS\n",
+       "tUm2ADMMLiC9o1XV56tqc1VtAR4GfqeqdjNl87Coqs4CryW5u1V9AngJ+BbTNR8ngY8kWdd+Vj4B\n",
+       "nGD65uFyK/q5aP+e3mh3sAXYPdTnysZ9hX3oqvrPMLgD5xSwf9zjWYXj/SiDNfQXgRfaYydwO/Ad\n",
+       "4BXgOWD9UJ/Pt/k5CfzDcR/DDZiTn+LS3UfTPA9/C/g+8J8Z/A/5tmmcD+CXGQTicQYXVW+Zpnlg\n",
+       "cOb8R8CbDK65/ty1HD+wrc3hKeBLS72uH16TJHWTsnwkSZoAhoIkqTMUJEmdoSBJ6gwFSVJnKEiS\n",
+       "OkNBktQZCpKk7v8BIgy2anPl5soAAAAASUVORK5CYII=\n"
+      ],
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x10790ef28>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%matplotlib inline\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.hist(bike_rentals[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "instant       0.278379\n",
+       "season        0.178056\n",
+       "yr            0.250495\n",
+       "mnth          0.120638\n",
+       "hr            0.394071\n",
+       "holiday      -0.030927\n",
+       "weekday       0.026900\n",
+       "workingday    0.030284\n",
+       "weathersit   -0.142426\n",
+       "temp          0.404772\n",
+       "atemp         0.400929\n",
+       "hum          -0.322911\n",
+       "windspeed     0.093234\n",
+       "casual        0.694564\n",
+       "registered    0.972151\n",
+       "cnt           1.000000\n",
+       "Name: cnt, dtype: float64"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bike_rentals.corr()[\"cnt\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def assign_label(hour):\n",
+    "    if hour >=0 and hour < 6:\n",
+    "        return 4\n",
+    "    elif hour >=6 and hour < 12:\n",
+    "        return 1\n",
+    "    elif hour >= 12 and hour < 18:\n",
+    "        return 2\n",
+    "    elif hour >= 18 and hour <=24:\n",
+    "        return 3\n",
+    "\n",
+    "bike_rentals[\"time_label\"] = bike_rentals[\"hr\"].apply(assign_label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Error metric\n",
+    "\n",
+    "The mean squared error metric makes the most sense to evaluate our error.  MSE works on continuous numeric data, which fits our data quite well."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "train = bike_rentals.sample(frac=.8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "test = bike_rentals.loc[~bike_rentals.index.isin(train.index)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, normalize=False)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "predictors = list(train.columns)\n",
+    "predictors.remove(\"cnt\")\n",
+    "predictors.remove(\"casual\")\n",
+    "predictors.remove(\"registered\")\n",
+    "predictors.remove(\"dteday\")\n",
+    "\n",
+    "reg = LinearRegression()\n",
+    "\n",
+    "reg.fit(train[predictors], train[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "16586.154698429491"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy\n",
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ -75.31906346,  144.15652539,  125.29713548, ...,  167.94469909,\n",
+       "        181.44415684,  165.3047817 ])"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "actual"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4          1\n",
+       "10        36\n",
+       "16        93\n",
+       "24        17\n",
+       "36        75\n",
+       "39        76\n",
+       "40        65\n",
+       "45         9\n",
+       "48         2\n",
+       "52        64\n",
+       "68        12\n",
+       "72         2\n",
+       "76       179\n",
+       "80        78\n",
+       "81        97\n",
+       "87       112\n",
+       "88        54\n",
+       "90        35\n",
+       "92         6\n",
+       "109      169\n",
+       "111       89\n",
+       "112       43\n",
+       "113       42\n",
+       "115       11\n",
+       "122      219\n",
+       "133      112\n",
+       "138       17\n",
+       "144       84\n",
+       "146      134\n",
+       "147       63\n",
+       "        ... \n",
+       "17232     34\n",
+       "17243     31\n",
+       "17245      8\n",
+       "17255     32\n",
+       "17265     45\n",
+       "17269     75\n",
+       "17280     63\n",
+       "17289     51\n",
+       "17291    239\n",
+       "17292    191\n",
+       "17298    225\n",
+       "17301    213\n",
+       "17302    128\n",
+       "17304     92\n",
+       "17309     19\n",
+       "17311      3\n",
+       "17312      3\n",
+       "17315     44\n",
+       "17316     49\n",
+       "17327     66\n",
+       "17339     33\n",
+       "17340     74\n",
+       "17343    144\n",
+       "17346    138\n",
+       "17348    123\n",
+       "17349    125\n",
+       "17351     72\n",
+       "17353     36\n",
+       "17354     49\n",
+       "17373    122\n",
+       "Name: cnt, dtype: int64"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test[\"cnt\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Error\n",
+    "\n",
+    "The error is very high, which may be due to the fact that the data has a few extremely high rental counts, but otherwise mostly low counts.  Larger errors are penalized more with MSE, which leads to a higher total error."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DecisionTreeRegressor(compute_importances=None, criterion='mse',\n",
+       "           max_depth=None, max_features=None, max_leaf_nodes=None,\n",
+       "           min_density=None, min_samples_leaf=5, min_samples_split=2,\n",
+       "           random_state=None, splitter='best')"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "\n",
+    "reg = DecisionTreeRegressor(min_samples_leaf=5)\n",
+    "\n",
+    "reg.fit(train[predictors], train[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2644.2820429330714"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2964.7288070579207"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "reg = DecisionTreeRegressor(min_samples_leaf=2)\n",
+    "\n",
+    "reg.fit(train[predictors], train[\"cnt\"])\n",
+    "\n",
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Decision tree error\n",
+    "\n",
+    "By taking the nonlinear predictors into account, the decision tree regressor appears to have much higher accuracy than linear regression."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(bootstrap=True, compute_importances=None,\n",
+       "           criterion='mse', max_depth=None, max_features='auto',\n",
+       "           max_leaf_nodes=None, min_density=None, min_samples_leaf=5,\n",
+       "           min_samples_split=2, n_estimators=10, n_jobs=1, oob_score=False,\n",
+       "           random_state=None, verbose=0)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import RandomForestRegressor\n",
+    "\n",
+    "reg = RandomForestRegressor(min_samples_leaf=5)\n",
+    "reg.fit(train[predictors], train[\"cnt\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1911.9827104170736"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = reg.predict(test[predictors])\n",
+    "\n",
+    "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Random forest error\n",
+    "\n",
+    "By removing some of the sources of overfitting, the random forest accuracy is improved over the decision tree accuracy."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

+ 799 - 0
Misson211Solution.ipynb

@@ -0,0 +1,799 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>type</th>\n",
+       "      <th>name</th>\n",
+       "      <th>yearpublished</th>\n",
+       "      <th>minplayers</th>\n",
+       "      <th>maxplayers</th>\n",
+       "      <th>playingtime</th>\n",
+       "      <th>minplaytime</th>\n",
+       "      <th>maxplaytime</th>\n",
+       "      <th>minage</th>\n",
+       "      <th>users_rated</th>\n",
+       "      <th>average_rating</th>\n",
+       "      <th>bayes_average_rating</th>\n",
+       "      <th>total_owners</th>\n",
+       "      <th>total_traders</th>\n",
+       "      <th>total_wanters</th>\n",
+       "      <th>total_wishers</th>\n",
+       "      <th>total_comments</th>\n",
+       "      <th>total_weights</th>\n",
+       "      <th>average_weight</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>12333</td>\n",
+       "      <td>boardgame</td>\n",
+       "      <td>Twilight Struggle</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>180</td>\n",
+       "      <td>180</td>\n",
+       "      <td>180</td>\n",
+       "      <td>13</td>\n",
+       "      <td>20113</td>\n",
+       "      <td>8.33774</td>\n",
+       "      <td>8.22186</td>\n",
+       "      <td>26647</td>\n",
+       "      <td>372</td>\n",
+       "      <td>1219</td>\n",
+       "      <td>5865</td>\n",
+       "      <td>5347</td>\n",
+       "      <td>2562</td>\n",
+       "      <td>3.4785</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>120677</td>\n",
+       "      <td>boardgame</td>\n",
+       "      <td>Terra Mystica</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>150</td>\n",
+       "      <td>60</td>\n",
+       "      <td>150</td>\n",
+       "      <td>12</td>\n",
+       "      <td>14383</td>\n",
+       "      <td>8.28798</td>\n",
+       "      <td>8.14232</td>\n",
+       "      <td>16519</td>\n",
+       "      <td>132</td>\n",
+       "      <td>1586</td>\n",
+       "      <td>6277</td>\n",
+       "      <td>2526</td>\n",
+       "      <td>1423</td>\n",
+       "      <td>3.8939</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>102794</td>\n",
+       "      <td>boardgame</td>\n",
+       "      <td>Caverna: The Cave Farmers</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>210</td>\n",
+       "      <td>30</td>\n",
+       "      <td>210</td>\n",
+       "      <td>12</td>\n",
+       "      <td>9262</td>\n",
+       "      <td>8.28994</td>\n",
+       "      <td>8.06886</td>\n",
+       "      <td>12230</td>\n",
+       "      <td>99</td>\n",
+       "      <td>1476</td>\n",
+       "      <td>5600</td>\n",
+       "      <td>1700</td>\n",
+       "      <td>777</td>\n",
+       "      <td>3.7761</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>25613</td>\n",
+       "      <td>boardgame</td>\n",
+       "      <td>Through the Ages: A Story of Civilization</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>240</td>\n",
+       "      <td>240</td>\n",
+       "      <td>240</td>\n",
+       "      <td>12</td>\n",
+       "      <td>13294</td>\n",
+       "      <td>8.20407</td>\n",
+       "      <td>8.05804</td>\n",
+       "      <td>14343</td>\n",
+       "      <td>362</td>\n",
+       "      <td>1084</td>\n",
+       "      <td>5075</td>\n",
+       "      <td>3378</td>\n",
+       "      <td>1642</td>\n",
+       "      <td>4.1590</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3076</td>\n",
+       "      <td>boardgame</td>\n",
+       "      <td>Puerto Rico</td>\n",
+       "      <td>2002</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>150</td>\n",
+       "      <td>90</td>\n",
+       "      <td>150</td>\n",
+       "      <td>12</td>\n",
+       "      <td>39883</td>\n",
+       "      <td>8.14261</td>\n",
+       "      <td>8.04524</td>\n",
+       "      <td>44362</td>\n",
+       "      <td>795</td>\n",
+       "      <td>861</td>\n",
+       "      <td>5414</td>\n",
+       "      <td>9173</td>\n",
+       "      <td>5213</td>\n",
+       "      <td>3.2943</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       id       type                                       name  \\\n",
+       "0   12333  boardgame                          Twilight Struggle   \n",
+       "1  120677  boardgame                              Terra Mystica   \n",
+       "2  102794  boardgame                  Caverna: The Cave Farmers   \n",
+       "3   25613  boardgame  Through the Ages: A Story of Civilization   \n",
+       "4    3076  boardgame                                Puerto Rico   \n",
+       "\n",
+       "   yearpublished  minplayers  maxplayers  playingtime  minplaytime  \\\n",
+       "0           2005           2           2          180          180   \n",
+       "1           2012           2           5          150           60   \n",
+       "2           2013           1           7          210           30   \n",
+       "3           2006           2           4          240          240   \n",
+       "4           2002           2           5          150           90   \n",
+       "\n",
+       "   maxplaytime  minage  users_rated  average_rating  bayes_average_rating  \\\n",
+       "0          180      13        20113         8.33774               8.22186   \n",
+       "1          150      12        14383         8.28798               8.14232   \n",
+       "2          210      12         9262         8.28994               8.06886   \n",
+       "3          240      12        13294         8.20407               8.05804   \n",
+       "4          150      12        39883         8.14261               8.04524   \n",
+       "\n",
+       "   total_owners  total_traders  total_wanters  total_wishers  total_comments  \\\n",
+       "0         26647            372           1219           5865            5347   \n",
+       "1         16519            132           1586           6277            2526   \n",
+       "2         12230             99           1476           5600            1700   \n",
+       "3         14343            362           1084           5075            3378   \n",
+       "4         44362            795            861           5414            9173   \n",
+       "\n",
+       "   total_weights  average_weight  \n",
+       "0           2562          3.4785  \n",
+       "1           1423          3.8939  \n",
+       "2            777          3.7761  \n",
+       "3           1642          4.1590  \n",
+       "4           5213          3.2943  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas\n",
+    "\n",
+    "board_games = pandas.read_csv(\"board_games.csv\")\n",
+    "board_games = board_games.dropna(axis=0)\n",
+    "board_games = board_games[board_games[\"users_rated\"] > 0]\n",
+    "\n",
+    "board_games.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([   602.,   1231.,   2824.,   5206.,   8223.,  13593.,  13849.,\n",
+       "          8470.,   2224.,    672.]),\n",
+       " array([  1. ,   1.9,   2.8,   3.7,   4.6,   5.5,   6.4,   7.3,   8.2,\n",
+       "          9.1,  10. ]),\n",
+       " <a list of 10 Patch objects>)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": [
+       "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEACAYAAACznAEdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
+       "AAALEgAACxIB0t1+/AAAFK9JREFUeJzt3X+MXedd5/H3h7jOOtuQyCCcH3YbbzVZ1VVBjRHOsotq\n",
+       "2hK8XWTnjypxBcEQi38MNKBVIQ4S8V8VFYLUC3IkID/sLPHWm1TBlbxpvIHRVuq2LjSkbhyvnRVe\n",
+       "PBN5UpI2XlYCbOW7f9xn8O10amfuvXPv+Pr9kkZ+znOf55znyDPnc89zzrk3VYUkSd836gFIkpYG\n",
+       "A0GSBBgIkqTGQJAkAQaCJKkxECRJwCUCIcmjSWaSHJ3ntf+Y5K0kK7vqdiY5meR4kju66tcnOdpe\n",
+       "291Vf3WSz7b6Lyd596B2TJK0MJc6Q3gM2DS3Mska4KeA/9NVtw64G1jX+uxJkvbyw8D2qpoAJpLM\n",
+       "rnM78Hqrfwj4dB/7Iknqw0UDoaq+CHxrnpd+H/iNOXVbgP1Vda6qTgGvABuS3AhcW1VHWrt9wJ2t\n",
+       "vBnY28pPAx9e8B5IkgZiwdcQkmwBpqrq63NeugmY6lqeAm6ep3661dP+PQ1QVeeBN7unoCRJw7Ns\n",
+       "IY2TXAM8QGe66J+rBzoiSdJILCgQgPcAtwAvtssDq4G/SrKBzjv/NV1tV9M5M5hu5bn1tNfeBbya\n",
+       "ZBlwXVW9MXejSfzAJUnqQVW97TftCwqEqjoKrJpdTvI3wPqqeiPJQeDJJL9PZypoAjhSVZXkbAuN\n",
+       "I8A9wH9qqzgIbAO+DHwMeH4QO3W5SbKrqnaNehyLxf27fI3zvsEVsX8LejN90UBIsh/4IPADSU4D\n",
+       "v11Vj3U1+eeNVdWxJAeAY8B5YEdd+CjVHcDjwArgUFU92+ofAZ5IchJ4Hdi6kMFLl5thne2O8xso\n",
+       "LZ6LBkJVffwSr/+rOcufAj41T7u/At4/T/0/Ane9rZFKY2OxM8EsUG98UnlpmBz1ABbZ5KgHsMgm\n",
+       "Rz2ARTQ56gEssslRD2ApyeXwBTlJylNgjYPOlNHinyH49yJY+LFzoXcZSWPLu9l0pTMQpO/g/L6u\n",
+       "XF5DkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJ\n",
+       "EmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJwCUCIcmjSWaSHO2q+90kLyd5McnnklzX9drOJCeT\n",
+       "HE9yR1f9+iRH22u7u+qvTvLZVv/lJO8e9A5Kkt6eS50hPAZsmlP3HPC+qvoR4ASwEyDJOuBuYF3r\n",
+       "syfJ7DeKPwxsr6oJYCLJ7Dq3A6+3+oeAT/e5P5KkHl00EKrqi8C35tQdrqq32uJXgNWtvAXYX1Xn\n",
+       "quoU8AqwIcmNwLVVdaS12wfc2cqbgb2t/DTw4T72RZLUh36vIdwLHGrlm4CprtemgJvnqZ9u9bR/\n",
+       "TwNU1XngzSQr+xyTJKkHy3rtmOS3gH+qqicHOJ6LbW9X1+JkVU0OY7uSdLlIshHY2Gv/ngIhyS8A\n",
+       "H+U7p3imgTVdy6vpnBlMc2Faqbt+ts+7gFeTLAOuq6o35ttmVe3qZaySdKVob5QnZ5eTPLiQ/gue\n",
+       "MmoXhD8JbKmqf+h66SCwNcnyJGuBCeBIVZ0BzibZ0C4y3wP8WVefba38MeD5hY5HkjQYFz1DSLIf\n",
+       "+CDwg0lOAw/SuatoOXC43UT0P6tqR1UdS3IAOAacB3ZUVbVV7QAeB1YAh6rq2Vb/CPBEkpPA68DW\n",
+       "Qe6cJOnty4Vj9tKVpKoql24p9S5JwWL/PYRhbMO/F8HCj50+qSxJAgwESVJjIEiSAANBktQYCJIk\n",
+       "wECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElS\n",
+       "YyBIkgADQZLUGAiSJMBAkCQ1BoIkCbhEICR5NMlMkqNddSuTHE5yIslzSa7vem1nkpNJjie5o6t+\n",
+       "fZKj7bXdXfVXJ/lsq/9ykncPegclSW/Ppc4QHgM2zam7HzhcVbcCz7dlkqwD7gbWtT57kqT1eRjY\n",
+       "XlUTwESS2XVuB15v9Q8Bn+5zfyRJPbpoIFTVF4FvzaneDOxt5b3Ana28BdhfVeeq6hTwCrAhyY3A\n",
+       "tVV1pLXb19Wne11PAx/ucT8kSX3q5RrCqqqaaeUZYFUr3wRMdbWbAm6ep3661dP+PQ1QVeeBN5Os\n",
+       "7GFMkqQ+Leunc1VVkhrUYC4mya6uxcmqmhzGdiXpcpFkI7Cx1/69BMJMkhuq6kybDnqt1U8Da7ra\n",
+       "raZzZjDdynPrZ/u8C3g1yTLguqp6Y76NVtWuHsYqSVeM9kZ5cnY5yYML6d/LlNFBYFsrbwOe6arf\n",
+       "mmR5krXABHCkqs4AZ5NsaBeZ7wH+bJ51fYzORWpJ0gik6nvP+CTZD3wQ+EE61wt+m87B/ACdd/an\n",
+       "gLuq6tut/QPAvcB54L6q+kKrXw88DqwADlXVJ1r91cATwAeA14Gt7YL03HFUVWVuvTRInenPxZ4B\n",
+       "DcPYhn8vgoUfOy8aCEuFgaBhMBA0bhZ67PRJZUkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmA\n",
+       "gSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqRm\n",
+       "2agHIGnwktRib2MhX96uy4OBII2lxc4Ds2AcOWUkSQL6CIQkO5O8lORokieTXJ1kZZLDSU4keS7J\n",
+       "9XPan0xyPMkdXfXr2zpOJtnd7w5JknrTUyAkuQX4JeC2qno/cBWwFbgfOFxVtwLPt2WSrAPuBtYB\n",
+       "m4A9SWbPOR8GtlfVBDCRZFPPe6OxlaQW+2fU+yiNWq9nCGeBc8A1SZYB1wCvApuBva3NXuDOVt4C\n",
+       "7K+qc1V1CngF2JDkRuDaqjrS2u3r6iPNUYv8I13ZegqEqnoD+D3gb+kEwber6jCwqqpmWrMZYFUr\n",
+       "3wRMda1iCrh5nvrpVi9JGrKe7jJK8h7g14BbgDeB/5rk57rbVNVAT8OT7OpanKyqyUGtW5LGQZKN\n",
+       "wMZe+/d62+mPAl+qqtfbID4H/BvgTJIbqupMmw56rbWfBtZ09V9N58xgupW766fn22BV7epxrJJ0\n",
+       "RWhvlCdnl5M8uJD+vV5DOA7cnmRFuzj8EeAY8HlgW2uzDXimlQ8CW5MsT7IWmACOVNUZ4GySDW09\n",
+       "93T1kSQNUU9nCFX1YpJ9wF8CbwFfA/4IuBY4kGQ7cAq4q7U/luQAndA4D+yoqtnppB3A48AK4FBV\n",
+       "Pdvz3kiSepYLx+WlK0n5mPyVrXM9ahhP37qNt7sN/yaXvoUeO31SWZIEGAiSpMZAkCQBBoIkqTEQ\n",
+       "JEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgI\n",
+       "kqTGQJAkAQaCJKkxECRJgIEgSWoMBEkS0EcgJLk+yVNJXk5yLMmGJCuTHE5yIslzSa7var8zyckk\n",
+       "x5Pc0VW/PsnR9trufndIktSbfs4QdgOHquq9wA8Dx4H7gcNVdSvwfFsmyTrgbmAdsAnYkyRtPQ8D\n",
+       "26tqAphIsqmPMUmSetRTICS5DviJqnoUoKrOV9WbwGZgb2u2F7izlbcA+6vqXFWdAl4BNiS5Ebi2\n",
+       "qo60dvu6+kiShqjXM4S1wDeTPJbka0n+OMm/BFZV1UxrMwOsauWbgKmu/lPAzfPUT7d6SdKQLeuj\n",
+       "323Ar1TVV5N8hjY9NKuqKkn1O8BZSXZ1LU5W1eSg1i1J4yDJRmBjr/17DYQpYKqqvtqWnwJ2AmeS\n",
+       "3FBVZ9p00Gvt9WlgTVf/1W0d063cXT893waralePY5WkK0J7ozw5u5zkwYX072nKqKrOAKeT3Nqq\n",
+       "PgK8BHwe2NbqtgHPtPJBYGuS5UnWAhPAkbaes+0OpQD3dPWRJA1Rr2cIAL8K/GmS5cD/Bn4RuAo4\n",
+       "kGQ7cAq4C6CqjiU5ABwDzgM7qmp2OmkH8Diwgs5dS8/2MSZJUo9y4bi8dCWpqsqlW2pcda5HLfbv\n",
+       "anAbb38b/k0ufQs9dvqksiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNf08qSwxyA8w\n",
+       "lDRaBoIGYBiZ4EOx0mJzykiSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiS\n",
+       "pMZAkCQBBoIkqTEQJElAn4GQ5KokLyT5fFtemeRwkhNJnktyfVfbnUlOJjme5I6u+vVJjrbXdvcz\n",
+       "HklS7/o9Q7gPOMaFzz++HzhcVbcCz7dlkqwD7gbWAZuAPUlmP8/4YWB7VU0AE0k29TkmSVIPeg6E\n",
+       "JKuBjwJ/woUPq98M7G3lvcCdrbwF2F9V56rqFPAKsCHJjcC1VXWktdvX1UeSNET9nCE8BHwSeKur\n",
+       "blVVzbTyDLCqlW8CprraTQE3z1M/3eolSUPW0zemJfkZ4LWqeiHJxvnaVFUN8usVk+zqWpysqslB\n",
+       "rVuSxkE7Hm/stX+vX6H548DmJB8F/gXw/UmeAGaS3FBVZ9p00Gut/TSwpqv/ajpnBtOt3F0/Pd8G\n",
+       "q2pXj2OVpCtCe6M8Obuc5MGF9O9pyqiqHqiqNVW1FtgK/HlV3QMcBLa1ZtuAZ1r5ILA1yfIka4EJ\n",
+       "4EhVnQHOJtnQLjLf09VHkjREvZ4hzDU7NfQ7wIEk24FTwF0AVXUsyQE6dySdB3ZU1WyfHcDjwArg\n",
+       "UFU9O6AxSZIWIBeOy0tXkqqqXLqlhq1znWgYv0Nh8bfjNhayDf8ml76FHjt9UlmSBBgIkqTGQJAk\n",
+       "AQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagb1fQhaggb5\n",
+       "FaaSxp+BMPaG8dn7ksaBU0aSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSgB4DIcmaJH+R5KUk\n",
+       "30jyiVa/MsnhJCeSPJfk+q4+O5OcTHI8yR1d9euTHG2v7e5/lyQNQ5Ja7J9R7+OVptczhHPAr1fV\n",
+       "+4DbgV9O8l7gfuBwVd0KPN+WSbIOuBtYB2wC9iSZfaLpYWB7VU0AE0k29bw3koaoFvlHw9ZTIFTV\n",
+       "mar661b+e+Bl4GZgM7C3NdsL3NnKW4D9VXWuqk4BrwAbktwIXFtVR1q7fV19JElD1Pc1hCS3AB8A\n",
+       "vgKsqqqZ9tIMsKqVbwKmurpN0QmQufXTrV6SNGR9fZZRkncCTwP3VdX/vTALBFU10DnAJLu6Fier\n",
+       "anJQ65akcZBkI7Cx1/49B0KSd9AJgyeq6plWPZPkhqo606aDXmv108Caru6r6ZwZTLdyd/30fNur\n",
+       "ql29jlWSrgTtjfLk7HKSBxfSv9e7jAI8Ahyrqs90vXQQ2NbK24Bnuuq3JlmeZC0wARypqjPA2SQb\n",
+       "2jrv6eojSRqiVC18VifJvwP+B/B1LtwOsBM4AhwA3gWcAu6qqm+3Pg8A9wLn6UwxfaHVrwceB1YA\n",
+       "h6rqE/Nsr6rKz1leoM6U3TA+/noYd4QMYztuY6ltw7/7/iz02NlTIAybgdAbA8FtXO7b8O++Pws9\n",
+       "dvqksiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiSgz88yUu/8rHdJS42BMFLDeHhIkt4ep4wk\n",
+       "SYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAT6H8F2SvBt4/6jHIWk4D3D6JTwXGAjf7afhh3bD\n",
+       "un9cvE383TvgG4u3emls+PDmMBkI8/r3b8Hj1y3e+g8B/2HxVi9JPfAagiQJMBAkSY2BIEkClkgg\n",
+       "JNmU5HiSk0l+c9TjkaQr0cgDIclVwB8Cm4B1wMeTvHe0oxq2yVEPYJFNjnoAi2xy1ANYRJOjHoCG\n",
+       "aOSBAPwY8EpVnaqqc8B/AbaMeExDNjnqASyyyVEPYJFNjnoAi2hy1APQEC2FQLgZON21PNXqJElD\n",
+       "tBSeQ1iCXyX5374PPvTm4q3/m+8Arlm89UvSwqVqtMfjJLcDu6pqU1veCbxVVZ/uarMEQ0OSlr6F\n",
+       "fDTHUgiEZcD/Aj4MvAocAT5eVS+PdGCSdIUZ+ZRRVZ1P8ivAF4CrgEcMA0kavpGfIUiSloalcJfR\n",
+       "RY3zQ2tJ1iT5iyQvJflGkk+MekyDluSqJC8k+fyoxzJoSa5P8lSSl5Mca9fDxkaSne1382iSJ5Nc\n",
+       "Peox9SPJo0lmkhztqluZ5HCSE0meS3L9KMfYj++xf7/bfj9fTPK5JBf90M4lHQhXwENr54Bfr6r3\n",
+       "AbcDvzxm+wdwH3CMJXk3Wd92A4eq6r3ADwNjM9WZ5Bbgl4Dbqur9dKZzt45yTAPwGJ1jSbf7gcNV\n",
+       "dSvwfFu+XM23f88B76uqHwFOADsvtoIlHQiM+UNrVXWmqv66lf+ezgHlptGOanCSrAY+CvwJY/bB\n",
+       "8+2d1k9U1aPQuRZWVYt4q/LQnaXzhuWaduPHNcD0aIfUn6r6IvCtOdWbgb2tvBe4c6iDGqD59q+q\n",
+       "DlfVW23xK8Dqi61jqQfCFfPQWntH9gE6/2nj4iHgk8Bbl2p4GVoLfDPJY0m+luSPk4zNsyVV9Qbw\n",
+       "e8Df0rn779tV9d9HO6pFsaqqZlp5Blg1ysEssnvpfBnL97TUA2Ecpxm+S5J3Ak8B97Uzhctekp8B\n",
+       "XquqFxizs4NmGXAbsKeqbgP+H5f3dMN3SPIe4NeAW+ictb4zyc+OdFCLrDp32IzlMSfJbwH/VFVP\n",
+       "XqzdUg+EaWBN1/IaOmcJYyPJO4Cngf9cVc+MejwD9OPA5iR/A+wHPpRk34jHNEhTwFRVfbUtP0Un\n",
+       "IMbFjwJfqqrXq+o88Dk6/6fjZibJDQBJbgReG/F4Bi7JL9CZur1koC/1QPhLYCLJLUmWA3cDB0c8\n",
+       "poFJEuAR4FhVfWbU4xmkqnqgqtZU1Vo6FyP/vKp+ftTjGpSqOgOcTnJrq/oI8NIIhzRox4Hbk6xo\n",
+       "v6cfoXNzwLg5CGxr5W3AOL0pI8kmOtO2W6rqHy7VfkkHQntnMvvQ2jHgs2P20Nq/BX4O+Ml2a+YL\n",
+       "7T9wHI3jqfivAn+a5EU6dxl9asTjGZiqehHYR+dN2ddb9R+NbkT9S7If+BLwr5OcTvKLwO8AP5Xk\n",
+       "BPChtnxZmmf/7gX+AHgncLgdX/ZcdB0+mCZJgiV+hiBJGh4DQZIEGAiSpMZAkCQBBoIkqTEQJEmA\n",
+       "gSBJagwESRIA/x9ipXt3iOuOXAAAAABJRU5ErkJggg==\n"
+      ],
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x1039faf98>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.hist(board_games[\"average_rating\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.57882993483\n",
+      "6.01611284933\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(board_games[\"average_rating\"].std())\n",
+    "print(board_games[\"average_rating\"].mean())\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Error metric\n",
+    "\n",
+    "In this data set, using mean squared error as an error metric makes sense.  This is because the data is continuous, and follows a somewhat normal distribution.  We'll be able to compare our error to the standard deviation to see how good the model is at predictions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=5, n_init=10,\n",
+       "    n_jobs=1, precompute_distances=True, random_state=None, tol=0.0001,\n",
+       "    verbose=0)"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.cluster import KMeans\n",
+    "\n",
+    "clus = KMeans(n_clusters=5)\n",
+    "cols = list(board_games.columns)\n",
+    "cols.remove(\"name\")\n",
+    "cols.remove(\"id\")\n",
+    "cols.remove(\"type\")\n",
+    "numeric = board_games[cols]\n",
+    "\n",
+    "clus.fit(numeric)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import numpy\n",
+    "game_mean = numeric.apply(numpy.mean, axis=1)\n",
+    "game_std = numeric.apply(numpy.std, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<matplotlib.collections.PathCollection at 0x10b5516d8>"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": [
+       "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEACAYAAACd2SCPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
+       "AAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYFNXZ9/Hvr9fZh1X2VVHBuCAqGhPBDXE3RiPmia9G\n",
+       "4hISXJJo1CyaXc0TjVn0TdTEJXHXGFREccFEI2IUFUEiRFEBAWUbZuvp5X7/qEI6vGigZ+ke5v5c\n",
+       "V19Unarqvs/MUHfXOVXnyMxwzjnntlWk2AE455zrnDyBOOecK4gnEOeccwXxBOKcc64gnkCcc84V\n",
+       "xBOIc865gmxVApE0SNLTkuZLel3SeWH5FZKWSpobvo7MO+ZSSYskLZQ0Ia98jKR54bbr8sqTku4O\n",
+       "y2dLGtKWFXXOOde2tvYKJA1caGa7AfsDX5M0EjDgGjMbHb4eBZA0CjgFGAVMBK6XpPC9bgAmm9kI\n",
+       "YISkiWH5ZGB1WH4tcFUb1M8551w72aoEYmYrzOyVcLkeeAMYEG7WFg45HrjTzNJmtgRYDIyV1A+o\n",
+       "NrM54X63ASeEy8cBt4bL9wOHbmNdnHPOdaBt7gORNBQYDcwOi6ZKelXSzZK6hWX9gaV5hy0lSDib\n",
+       "ly9jUyIaALwHYGYZYL2kHtsan3POuY6xTQlEUhVwH3B+eCVyAzAM2At4H/hFm0fonHOuJMW2dkdJ\n",
+       "cYKmpT+Z2YMAZrYqb/tNwEPh6jJgUN7hAwmuPJaFy5uXbzxmMLBcUgyoNbM1m8XgA3c551wBzGxL\n",
+       "3Q2tftP/+iLo57gNuHaz8n55yxcCd4TLo4BXgATBFcq/AYXbXgDGhu85HZgYlk8BbgiXJwF3bSEO\n",
+       "25p4S/UFXFHsGDz+4sfRFePvzLFvJ/Fbe7zv1l6BHAh8CXhN0tyw7DLgVEl7EdyN9TZwThjpAkn3\n",
+       "AAuADDDFwlqEieIWoByYbmYzwvKbgdslLQJWh0nEOedcidqqBGJmz7Ll/pJHP+GYnwI/3UL5S8Du\n",
+       "WyhPAV/Ymnicc84Vnz+J3rFmFTuAVppV7ABaaVaxA2ilWcUOoBVmFTuAVppV7ABKkTa1LJU+SWbt\n",
+       "0RHknHPbsfY6d/oViHPOuYJ4AnHOOVcQTyDOOecK4gnEOedcQTyBOOecK4gnEOeccwXxBOKcc+1M\n",
+       "Um9J4yTtVOxY2pInEOeca0eSDo/B0h3gqQS8GZfuLHZMbcUfJHTOuXYiKRqFhtMgORSoB34LNMG5\n",
+       "Zva7DozDHyR0zrlOppsgMTRcqSKY5yIaDE7b6XkCcc659rNWkFu4cYVg2lXBqk84ptPwJiznnGtH\n",
+       "kr4Rh19UAg2AQSoD+5jZ6x0YQ7ucOz2BOOdcO5M0MR7MhbQuDVeZ2fwO/nxPIJ5AnHNu23knunPO\n",
+       "uZLiCcQ551xBPIE455wriCcQ55xzBfEE4pxzriCeQJxzzhXEE4hzzrmCeAJxzjlXEE8gzjnnCuIJ\n",
+       "xDnnXEE8gTjnnCuIJxDnXKcj6eyy2rKWRGUiV9atbNX2NlVsZ7FVCUTSIElPS5ov6XVJ54XlPSTN\n",
+       "lPSmpMcldcs75lJJiyQtlDQhr3yMpHnhtuvyypOS7g7LZ0sa0pYVdc5tHyTtH6uI/e6Ym4+Kf3Xh\n",
+       "Odpt0qjeyZrEy8WOqyva2iuQNHChme0G7A98TdJI4BJgppntDDwZriNpFHAKMAqYCFwvaeNIkDcA\n",
+       "k81sBDBC0sSwfDKwOiy/Friq1bVzzm2P/s/ww4bZqJNGUjuohiN/ewTpxky1pO7FDqyr2aoEYmYr\n",
+       "zOyVcLkeeAMYABwH3BruditwQrh8PHCnmaXNbAmwGBgrqR9QbWZzwv1uyzsm/73uBw4ttFLOue3a\n",
+       "mnVL1stywVQUG5Zt2FheX7SIuqjYth4gaSgwGngB6GNmK8NNK4E+4XJ/YHbeYUsJEk46XN5oWVhO\n",
+       "+O97AGaWkbReUg8zW7OtMTrntms/Wff2ugv/dNgdFYM+O4i5v59LJKZHsmlLFzuwrmabEoikKoKr\n",
+       "g/PNbMOmVikwM5PUeWancs6VvLBZagTwqpmlAMysSVLfJU+/87slz7wziBwzzOwnxY20a9rqBCIp\n",
+       "TpA8bjezB8PilZL6mtmKsHlq40Txy4BBeYcPJLjyWBYub16+8ZjBwHJJMaB2S1cfkq7IW51lZrO2\n",
+       "tg7Ouc4jEo/8LpqInh0rj5FL50zSqWZ2N4CZbQC+WOQQS5ak8cD4dv+crZnSNuwAv5Wgk/vCvPKr\n",
+       "w7KrJF0CdDOzS8JO9DuA/Qiapp4AdgqvUl4AzgPmAI8AvzKzGZKmALub2VclTQJOMLNJm8XhU9o6\n",
+       "1wVIOjJRnZh+5gtn0HtkL+bdMZ9HzpqeSzemY9aZ5uEuEcWe0vZA4EvAwZLmhq+JwJXA4ZLeBA4J\n",
+       "1zGzBcA9wALgUWBK3i99CnATsAhYbGYzwvKbgZ6SFgEXEN7R5Zzrko4Y9JlB9B7ZC4Ddv7gblrMI\n",
+       "MKy4Ybl8W3UFUir8CsS5rkHSmZU7VNw85V/nUtatjOX/fJ9bDryNbEs2Yead5duqvc6dnkCccyUp\n",
+       "WZN8JZqI7tl7t172/j9XKN2c/qll7TvFjqsz8gSCJxDnupqwP3QUMMPM/lHseDorTyB4AnHOuUIU\n",
+       "uxPdOeec+w+eQJxzzhXEE4hzzrmCeAJxzjlXEE8gzjnnCuIJxDnnXEE8gTjnnCuIJxDnnHMF8QTi\n",
+       "nHOuIJ5AnHPOFcQTiHPOuYJ4AnHOOVcQTyDOOecK4gnEOedcQTyBOOecK4gnEOfcfyWpu6TyYsfh\n",
+       "SosnEOfcx5LUr6w2uToSi6yJxCKNicr485J8UjcHeAJxzn2CZE3yuR0n7tjjkoaLuGDZVKr6V+8P\n",
+       "/LrYcbnS4AnEOffxxOADL/s00USUyh0q2ffrY0hUxQ8rdliuNHgCcc59PNG07PllAJgZ7z37Humm\n",
+       "zLtFjsqVCJlZsWPYau01MbxzDiRFgV8CuwMvAd8CPhcrj90/ZPxgGlY1suZfa1pa6lsGm9nKogbr\n",
+       "tkl7nTs9gTjnkKREdWJJ7eCawbucsDML7l1I/fv1C1N1qZGSdgfOBuqBK81sfZHDddvIEwieQJxr\n",
+       "L5KOLute9vAFS6cSr4iTqktxbf9fkW5IH2Bms4sdn2ud9jp3eh+Icw6gb0XvCotXxAFI1iQp61YG\n",
+       "0L+oUbmS5gnEOQfw8IalG3jxt/9kw/v1/OPnz9O8rjkLzCx2YK50eROWcw4ASUcma5P3Z1uy5dFE\n",
+       "tDG1PnWMmT1d7Lhc6xW1CUvSHyStlDQvr+wKSUslzQ1fR+Ztu1TSIkkLJU3IKx8jaV647bq88qSk\n",
+       "u8Py2ZKGtFUFnXNbx8webV7XXJFuTKt5XXOlJw/332xtE9YfgYmblRlwjZmNDl+PAkgaBZwCjAqP\n",
+       "uT5v6IMbgMlmNgIYIWnje04GVofl1wJXFVwj55xzHWKrEoiZ/R1Yu4VNW7okOh6408zSZrYEWAyM\n",
+       "ldQPqDazOeF+twEnhMvHAbeGy/cDh25d+M4554qltZ3oUyW9KulmSd3Csv7A0rx9lgIDtlC+LCwn\n",
+       "/Pc9ADPLAOsl9WhlbM4559pRrBXH3gD8MFz+EfALgqaodiXpirzVWWY2q70/0znnOhNJ44Hx7f05\n",
+       "BScQM1u1cVnSTcBD4eoyYFDergMJrjyWhcubl288ZjCwXFIMqDWzNR/zuVcUGrNzznUF4RfrWRvX\n",
+       "JV3eHp9TcBNW2Kex0eeAjXdoTQMmSUpIGgaMAOaY2QqgTtLYsFP9NOCvececHi6fBDxZaFzOOec6\n",
+       "xlZdgUi6ExgH9JL0HnA5MF7SXgR3Y70NnANgZgsk3QMsADLAFNv0sMkU4BagHJhuZjPC8puB2yUt\n",
+       "AlYDk9qgbs4559qRP0jonHPbOR8Lyzm3zcK5zJPFjsNtnzyBOLcdkjQoWZtcq6jWRGKR5nhF/Jli\n",
+       "x+S2P55AnNsOJWuTz+187IhulzZdzHnvfp3KHSoPkvS/xY7LbV88gTi3HTKzAZ/5zoFE41Gq+1Wx\n",
+       "79fHECuPTfjvRzq39TyBONfJKXC0pNMldQdQRM3LZm+ay/zdv79HptnnMndty+/Ccq4TkxRPVCfe\n",
+       "jSaifZO1SRpWNmTTDenxwMBYeezOoYcMoX55PWsWr021bGgZZGYfFDtm1/F8Sls8gTi3OUm399+v\n",
+       "35dOf+Y0YmUx/v6T53j+f2evb17b3E3SaOArQB0+l3mX5rfxOtfFSNq9rDb5TkXPisZkTXKepN6b\n",
+       "7xMrj+0x8qSRxMqCZ4JHnbwrlrVqADOba2ZfM7NLPXm49uAJxLkSI+nksu5lq8q6l702YOyAwSfe\n",
+       "fXz5jhOGfSpRnViUN7cOAJmmzMvz71pAujENwOt3zEdRebJwHcKbsJwrIZImxMpjjx3ys/HUDqll\n",
+       "1vf+xh7/Z3f2v3A/ru7+C9L16b3M7NW8/aOJ6sQSRTQwWZ2gaV1zNl2f/nTevDvOtdu5szXDuTvn\n",
+       "2pq4bMw5oxl7/n4AdBvajftOeoB9vz4GyxpAOn93M8tKGgwclFqf6gXMMLOGDo/bdUmeQJwrLZbf\n",
+       "KmA5I5PKcOdRdxOJRd43swUfc4A/ae46nDdhOVdCJB0SK489Oe4HB1E7pIYnL36ahg8bWixrf882\n",
+       "Z481s6Zix+g6H7+NF08grmuQdEKyNvkbRVWRqks9aBmbbJ3pP6orOZ5A8ATinHOF8OdAnHPOlRRP\n",
+       "IM455wriCcS5dhYOdvgTSdMkXVTseJxrK55AnGtHkpSoTizuvlP3y/b52phjq/pVXZ2oSjwr6eRk\n",
+       "TTIdiUUsWZNskXRisWN1blt5J7pzbUzSDxDjMd4CZpb3LP/zee98jURlgoZVDVw35Dcg7OgbjtSo\n",
+       "k0fy+l0LeGzq45ZuTPc2s9XFjt9tf7wT3blOIF4Z/1vNwOrvf/a7Bx404IABZySq4jdVD6iyRGUC\n",
+       "gModKknWJKnsXak9T9+DeEWc0WfuSWWfCgFHFDd657aNJxDn2oiknrl07rOTX/wy4384jjP+dhoV\n",
+       "vSvKP1y4WvPvWUBLQwtzfv0i6YZ0rmlNE83rmgFoWttEw6pGgLeKWgHntpEPZeJc2+kdjUep7FMJ\n",
+       "QCQWoXZwLeveXj/t4a9MPyrdmI4lKuOpdEP6uERV/Oe/3+umPXY6aicWPbwYSXPNbHaR43dum3gf\n",
+       "iHNtJOwwbxpzzujkfhfsxzvPvMvDZz1CpjHzKTObv/m+wHeBMcAcM/tpUYJ2XYI/iY4nEFf6JO2e\n",
+       "rE3+LduS7RZNRFtS61NfMbPbix2X69o8geAJxLUvSROA4cAjZvZeseNxrq34fCDOtZOw6Wlesia5\n",
+       "W/WAalu3ZB2STvcrB+c+mV+BuC5P0ndqh9T8+OxXv0JZbRnz717Aw2dNz6TqUvFix+ZcWyjqcyCS\n",
+       "/iBppaR5eWU9JM2U9KakxyV1y9t2qaRFkhaGzQIby8dImhduuy6vPCnp7rB8tqQhbVVB57bC6OET\n",
+       "hlNWWwbALifsTEt9S2zz+cedc/9pa58D+SMwcbOyS4CZZrYz8GS4jqRRwCnAqPCY6/P+I94ATDaz\n",
+       "EcAISRvfczKwOiy/FriqwPo4V4jZix5eTOOHjQDM+/N8ElWJtM/B4dwn2+omLElDgYfMbPdwfSEw\n",
+       "zsxWSuoLzDKzXSVdCuTM7KpwvxnAFcA7wFNmNjIsnwSMN7Nzw30uN7MXJMWA982s9xZi8CYs1y4S\n",
+       "VYl/mNkBFT0raFrdZOnG9Ilm9uDG7ZKOjlfGr4nEImWp9anf+W23rjMpxU70Pma2MlxeCfQJl/sD\n",
+       "+Q9ELQUGAOlweaNlYTnhv+8BmFlG0npJPcxsTSvic26rtdS3fFrS6LrGumHA02a2duM2SRNj5bGH\n",
+       "P/3tA6joVc7T333mJ4qop+Xsm0UM2bmia5O7sMzMJHXI5b6kK/JWZ5nZrI74XLf9M7O5wNzNy2Pl\n",
+       "sZ+NvXA/DvreZwDoNqwbf/niX78GeAJxJUnSeGB8e39OaxLISkl9zWyFpH7AqrB8GTAob7+BBFce\n",
+       "y8Llzcs3HjMYWB42YdV+3NWHmV3Ripid22aKKJGsSX60nqxOgI8j50pY+MV61sZ1SZe3x+e05j/B\n",
+       "NOD0cPl04MG88kmSEpKGASMIhmpYAdRJGht2qp8G/HUL73USQae8c+1G0gmKaKakhyTt/kn7phvS\n",
+       "v3z2x88y/+4FvP3k2/z19IdIN6Yf7ahYnStVW9WJLulOYBzQi6C/4/sEJ/97CK4clgBfMLN14f6X\n",
+       "AWcCGeB8M3ssLB8D3AKUA9PN7LywPAncDowGVgOTzGzJFuLwTnS3zSTtlaxNfkdRVaXWNd9kOfrG\n",
+       "K+K/2fvc0TStbmLBvW9YpjGzr5m99Anv8e2yHmXfFcRa6tMPZ1uyX/C7tFxn4UOZ4AnEbTtJu8XK\n",
+       "Yy8c/KNxFRU7VOiJi55qTNWnYsfeeHTiU6fuBsBj5z/OS7+b+1I2lX2krEfZFHJkmtc1f9/Mbixy\n",
+       "+M61iVK8C8u5khcri5316Yv2r9j/m2MFUD2guuK+k+6n27CPnnulx4geADuW9Sj7/oRrDiPdkOaJ\n",
+       "i576vaSsmf2hSKE7V/I8gbjtm4hG4pGPvnlF4xEUifD4hTM54U/H07ymib/96FniFfHKY28+ml1P\n",
+       "2AWATCrLM1f87XuAJxDnPoYnELddyzRlHnjuZ89/vapvFRW9K5j5zSfJZXP0368/f9z/FjKpLOmm\n",
+       "9O1lNcnPR6KRj8a+ikSF5HdaOfdJvA/EbdcknVLZt/KugfsPINOUYdQpI3nxty9x5G+OoKW+hQcm\n",
+       "/aWp8cOmCkX064reFV8/8rdHkK5PM2PqY7TUp6eY2Q3FroNzreV9IM4VZnGqLsUxNx5FRa8KmtY2\n",
+       "8eTFT2FmPH3ZLDJNmRcALGdTFVFm+rkzzrScZVvq0z/y5OHcJ/MrELfdS1Ql/pGsSR6w48ThvPX4\n",
+       "WzStbcayRiQWeT3dkN7bzNLFjtG59uS38eIJxBVO0oXAAcCLZvbzYsfjXEfyBIInELdlCv8wih2H\n",
+       "c6WqqBNKOVdKJJ0u6TEpskSxMkPRnMpqV0ka8N+Pds61FU8grlORoleRrLmFA74xgR0nDKH7cDh/\n",
+       "EfTdqzexsgXhwJ7OuQ7gTViuU1GyOsuXHo0wJBhanTtPgKUvQI8dIdMCHyzIkW7YJxya3TmHN2E5\n",
+       "F8hlIvTYadN69x2h21CY/CycMwf2/WqEZM3LiibWSPp80eJ0rgvwBOJKkqS4pAcVia5XJLZS0pkA\n",
+       "RBPLmT4VNqyAd/4OL98Eux6/6cDhh0LvkdBjx+7Eyu6TdG6RquDcds+bsFzJkSTilatQpBd7nAqV\n",
+       "feH5a6Flw/eB35OseZlsS38i8RwtDQ0MPqCa0x4DReHeU2CHTwWJZPp5sGH5BmtaU1PsOjlXTP4k\n",
+       "uutKjgf1YvQZcNSvgpIB+8D9p11Oav0AUnXfMLO7IZxLZsVri7iyxyAUhV2OgfHfhzf+AokqkOKf\n",
+       "8DnOuVbwJixXUiRVARORIJF34VA9AKQoww45h2TNXYqV3wtgZilL1Q0m2/I/mEG3YfCPa2D6VKhf\n",
+       "AenGp4tUFee2e96E5YpKUm+gP/A6UEOi6iX67rUDZbWVvP10cAUycH944HRY+2+o7A31KyHdBLn0\n",
+       "YDN7L++9DiOSuItovCdmObBnyTQdYmbZolXQuRLgTVhuu6N4xWNE4hOIJQHSZFJ3MPLEAZx4awKA\n",
+       "OdfDjG+BZcFycPJdQRPVh2/C78ZAS/pU4OqN72dmTxBMu+yc6wDehOU6jKTu4VPkkyVdTlnNBC5c\n",
+       "ApfVwYEXx4mV/w8D9098dMCA/ZBsAy0bziISC5IHQK+doe9eAL2LUQ/nXMATiOsQkvoRr1jAThNv\n",
+       "YJfjfkes/HJ2PAJq+oMEY6dCuiHG89ek2PA+pJtI/P1ydumZqqhK8m3STcEDgwAb3oeV8wCeK2ad\n",
+       "nOvqvAnLdYxE5Q/Y+6xeHHlt8Df37NXBrbmZFoglYMnTUDMI1i9Ncs1gojKO2CPBnV9pjo6+kj6L\n",
+       "Pkjfwh8PPoNeuxhrFgvLPWlmDxa5Vs51aZ5AXMeIJIYzYJ9Nf2/99gYE1+0YPE3+wQL44l/hka9b\n",
+       "9ZqXsyt+Rqwi0YQZZHMAdiOZpmtY8cphwCtm5ndXOVdknkBcx0g3jeDvV8LwwyCahGd+CJU7QCYF\n",
+       "zWvhgAuDpFK3TFlT+sQbLXbWp2HaPFIf1PMu8E8zawHmFbsqzrmA38brOoTKe65j+CG1LHokuKNq\n",
+       "2KHw/iuw77kQr4B5d0K6Eereg52PIZatp2L532huaqxrackMNrP1xa6Dc52V38brOrdsagnRxJ5c\n",
+       "ugEwePAMSK2DV28DRWD9u8FQJIkqOPp6MhXdqWtpgCt71kCmO+AJxLkS41cgrk1IOhQ0DmwF8Ecg\n",
+       "A5wM9AVeAvoQr7ybsm4goLwXnPkMLLgfHr0Aeo0MkkftQGhYBac9Ctk0/KwW0k0jzWxhEavnXKfm\n",
+       "U9riCaTUSAqe2YgmziZZexX7nlPO0heaeW/2ImADvXbZi/77JCOv3hKrjKVJZ4zmTAQGjoUvPw3R\n",
+       "cJiqK3tB7eCgI/2yDfCTSvifh+Clm+Dfj68nVdfdp6x1rnA+H4grGZLiStbcDpFG4pUp4Nd85bkK\n",
+       "DvmROO2xcnrtsjPl3cZw1uzKeHlVbJc+4sZJWS44BMpjWVizKBiKBOCDhZBrgc//CSIxaPwAMLj3\n",
+       "1BSLH3uDVN0ITx7OlaZWJxBJSyS9JmmupDlhWQ9JMyW9KelxSd3y9r9U0iJJCyVNyCsfI2leuO26\n",
+       "1sbl2lG84jv02eNEeuwY5eAfgBnUDgm2SdB9uKjsIxRBc37DzHObOGUM/OzYHIePFMTK4Prd4Y7j\n",
+       "4I/j4KjfBMdG43DTARCJzbKmNWWWqhtlZh8Ur6LOuU/SFlcgBow3s9Fmtl9Ydgkw08x2Bp4M15E0\n",
+       "CjgFGAVMBK6XtPGy6gZgspmNAEZImtgGsbn2ECufwOgzKsg2w4HfhJ0mwCNfg/VLYeE0ePPhCKte\n",
+       "j/L2LMxyxKN5h0Ys6OOoXwmLZsCunws60f98NKSb6lj/7lWWbjq4eJVzzm2ttroLa/O2teOAceHy\n",
+       "rcAsgiRyPHCnmaWBJZIWA2MlvQNUm9mc8JjbgBOAGW0Un2tLufQ7rJq/H83romxYASf+CaadDb/e\n",
+       "BeKVEInFSdVv4N5TqiOW5cjr4cfHwCvLYPrCOFTsAJW9YNCBMPfmZhbcu4HmumlY5ixvrnKu82ir\n",
+       "K5AnJP1T0llhWR8zWxkurwT6hMv9gaV5xy4FBmyhfFlY7kqIpKiksaTq5vPS76NU9g1GxX3iEljx\n",
+       "SvA8x8Cx8NVXIRKtJhonNXUJrw29mEl39+THj8doziZg2Hj48F+w8AFIN15tjat3sFz6K548nOtc\n",
+       "2uIK5EAzez+c12GmpP+43dLMTFKbnRgkXZG3OsvMZrXVe7uApGEETYyNwANmtkHR+FeIJn6P5USs\n",
+       "LOi3iCXh3eeCeclrB8OB34b5d8Ovdw0eFmxck+GXw2OZZA113YbAGX+FWw+BPrvDgvugcfXjZnZ5\n",
+       "kavr3HZH0nhgfLt/Tlt+6ZN0OVAPnEXQL7JCUj/gaTPbVdIlAGZ2Zbj/DOBy4J1wn5Fh+anAODM7\n",
+       "d7P399t425mk/crjPHXiXkRW1pGbvYQP6lOcQbJ2Jmc9H6fXrjDrB/DWEzD52eCgG/eHlgbovw/U\n",
+       "DIB1S4L+jWQNNK2BPnvCPmcFxyy4H4hAS/2NZtmzi1lX57qKkryNV1KFpOpwuRKYQDBW0TTg9HC3\n",
+       "04GNo6ZOAyZJSoTfckcAc8xsBVAnaWzYqX5a3jGuA3Uv54bfn0rln06nfOZUKk/Yg/4R8S12OS5C\n",
+       "75HBXVYHfQfemw25LKx9m+Sa+VTXvR7cmjv/XvjUJNj1OBh6UHCH1oblMPuX8OYjkElDS91lnjyc\n",
+       "6/xa24TVB/hLeCNVDPizmT0u6Z/APZImA0uALwCY2QJJ9wALCJ5UnpLX7j0FuAUoB6abmXegF0EO\n",
+       "dtgjr/dp70Ek7nklenTL8jn6aOj1ZXMgmiD6212JNyzj58elSKfh0ofuI/WZ78POR8NzVweDJmZT\n",
+       "77N2cT0bygaTTa/GsuPMbHHxauicayv+JLr7iKQelQlmHLQT+971ZVhVD+N+CcvrkxBNBPOR994N\n",
+       "3n4Kchkun5DinM9Av1r45gPwq2fLyIyZAstfgjVvBrdXpBs3WPO6mmLXzbmuzIcywRNIe5LUi3jF\n",
+       "2ww/vKp83eukP/g3kXiCbKSMrJKQ2gAHXgSzr4PRZ8ILv6YqmeXbh8PKOrjpedHcewysfA12PiZ4\n",
+       "JSrh4SkN1vhhVbHr51xX5qPxunamJxh1UhUn3koTwIIH4Lmfw7BDYOFfg6fHawfBwZfD2rchWUn9\n",
+       "bl/gB4/dSjYHNnxCMH7V9XtC75HQfSj85XRoXn9fkSvmnGsnnkC6IEk9iSbuJ14xnJb6l4nEUiQq\n",
+       "92SH3Tbt1HNnaFoL788NhlpHwRXFqtfhgzeCfRZOI7PfN4KyIZ8NOthrBsLz12aZc32Wlvr7scyX\n",
+       "i1JJ51y78yasLkZSkkT1GoYfUsHOx8DfroSGFXD8TTDzYjjlAajqC385A1a/GcwWGE2CZWHk5+H1\n",
+       "O6GqP+x6PLxyC/TYKUgwX5oeJJKHzoZ00z5m9lKx6+qcC3gTlms1SeXAVcTKK8ikg3k40o3QcwTs\n",
+       "PgkyzXDPycFYVaZgutlIBCp6Q/fh8Nqfgnk8Jv89ePajaXWKZavvIVF9OH/4bB8UTZFuOtuTh3Nd\n",
+       "gyeQLkJSTxJVL9Bt2EAiUXj3GTjpThiwbzCG1b9nwugzgnnK7/kCTLo3mHL2qe8FQ5REolDdDzas\n",
+       "gGsGBwMgwmFm9myRq+acKxJPINuR8CHMqQSjHT9iZg99tDFe8Xs+NWk4x/1eSPDoBUQfOZvsF6fD\n",
+       "pL8Eo+GioB/j4B/CiCPh9XuCY9e9A+vehlglRMsgXQ+5lus9eTjXtfmEUtsJSSJRvYiagdex6/Hn\n",
+       "kKiepkjst5IiipX9kFjZiex0RJA8AHY6gsHVjZT/+eBwZkBBrBxyOahbGlyFLHwwuCLBIJsNkktL\n",
+       "HVimv1nua8Wsr3Ou+LwTfTsh6atU97+e894MEsGDX4FX/wwR1dNzxyoGjoV17wa32ipC2X0nMGXg\n",
+       "UzyzyHh88arSAAAOqklEQVRpWRzKe0L3YcFzHJlU2EQV/qgtByfdAQ+cBi0brjOzC4paWefcNvFO\n",
+       "dPff7EyfPSDTAjceCKveA86E2B1VfOHeoKP8L2fAlT2JyBg3Ulw+oYVd5wA5QfO6YIj1IQcFt+K+\n",
+       "ejukmyGSgN4jgmc6crm/efJwzm3kTVidiKSK2nI9kIgqVZ5QXSKmr+Ztfoi3noafD4BVbwMbgPug\n",
+       "JQ1vPRk0U530Z/jUKcRyzezdt4mDrhPrmqMQKQuaqnY7Cd5/Gd59FnJpsPSjZBvGs+LVH5Jaf5yl\n",
+       "68d9TGjOuS7Im7A6kdpy3Xbwzpx8y5coW7YeDrmOxlX1nGhmjwXzzsc+hDOiwej4rwFHAk3Ag3DQ\n",
+       "tyASg+f+N3jor+9e0Lg6mIKWXDAcO7lg/KqqPrB+2Xyz9KeKWF3nXBvxJiyHGUdcdTxl3SqgWwVc\n",
+       "cDAVV0xnAvAY8GmoiMJA4GHgGGBQeORqmP1biMaCvo3DfhbM1XH35yG1LmimMoPqvtBcB40fZiBz\n",
+       "TLHq6ZzrHLwJqxOJRlg9//1N668sJdWSZSdJ0yD+ILQA9xD8WpvzjmyCljg0rYNMBm7+DFy/O6za\n",
+       "ANmDIZ2GeDnUr4TmNdeSbuhvZks6tHLOuU7Hm7A6EUnjKhJMP3UMkXfWkHv235Q3ZxIK2p12A/YC\n",
+       "XgVeJ0giBwMNwGyCfboBY9i93xMs/jBHU/osYDXBFUsmBdlKM8sWo27Oufbjw7njCQRA0i7ArwQT\n",
+       "IoKsRYEyIA0cBBxIWfx/SUQbqGseCHxA0FJZAaQpj9fy0+PeIR6BS6b1pD61AWi5z8xOLladnHPt\n",
+       "qySntHUdK3zS/LflcSacPx6+OxEqE1ngaIIH0F8kmAAyS3MaYBVRZYDDCO7KamS/IcuZ8lkY0gNi\n",
+       "kdU5aJniycM5Vwi/AukEJA0EjoDo8Ylo5NiLDkvz42ODbXe8COfeFWFD6mLgMcS7SHWINGMGiX2G\n",
+       "GNf/PXgosE91lmnnQDIGJ90M767h3lTGvlDEqjnnOoDfhdVFSboAdE0wVXyzYpFhDOz25kfb+9WC\n",
+       "FAHmA29jGGY5Lj5U/PAYY++rNv7NZKlrhsN/E6ylMsxqyXJKB1fHObcd8SuQEqUgK9wL8RMhAdQC\n",
+       "q4DP0LPyOe6bnKa6DL54C/z7wx5kcxsIvg+kiUUyjB4I762D9U2iKW0A3zOzHxetQs65ovFOdLpO\n",
+       "Agnn7VgPkXjQ+Z0huEW3D7AW2I/qsn8CjTS29CCbqwMqgUa6l6dZ2wTBOFYRIFsP1Fhn+kU759qU\n",
+       "JxC6RgKRtCOwCGKCLMFVRZaasihm0JTOksnF2fSsR4zgFl1jYG2WlRsgnQOgDtjXzN7c8ic557oK\n",
+       "vwurC5B0KUQWQzx8tiMKfJaasgh3n5nm6fPT7NonRyzSQvCrEwmlqUhkqC3Lsq7po+RxiJnVevJw\n",
+       "zrUnTyDtRFIsGtHU2nLdFovqIkmJ/7L/NOCnkAtL4sAAEtFn+O7EDBNHwZjB8IcvQUXCgBRgDO8N\n",
+       "jS2wvhnqWwC42syebs+6Oecc+F1Y7ULS8Kh4qjzBwAOGEm1oofm15Rwl6VAzy4X7bJxwIxoTzVGh\n",
+       "RAzSWcjk0gSJZBTZXCXvr1/w0XuvrAv+jZIhHoMlaz7alAammtnvOqyizrkuzRNIG5N0QEWc5/Yf\n",
+       "hrI5ePYtyBll8QifBvaSNLcywU+j8I1scHsVOYOKJOyyAwzsDjPfgMb0YcAzZO04bnh2AZkc9KuB\n",
+       "nz0OqTTEYtCcAeANMxtVxCo757oo70RvY93KtfS88Qy44GC4/BF4/A14dx3UJGBNEy9ncoyuTKCI\n",
+       "YEMqOKYyARVxWP6TIDHcNxe+/KdK6lPDw3d9g4gyRBQlk8tRHjea0sEGTx7Ouf/GO9E7iViUbnsO\n",
+       "gNFXwoo6GLEDVMaDPopMjr3L4+iCg+Gnx0H38uCYk/eEHpVw/v3B+j6DIWcZ4D1gAZAlZ5VkcgmE\n",
+       "aE6XAzFPHs65oiqpBCJpoqSFkhZJ+nax4ylEU5pXL3sI+lQHzVc77wAXHALxWHClMXUc/PhY+Po4\n",
+       "uPPL0K0cbnkRJo2B598O+kC+/wgEz37UA1mEEYyq25Q1bK7RdDVkditmPZ1zrmT6QCRFgd8QjPy3\n",
+       "DHhR0jQze6O4kW2bxhYOX7KaD4f2oPz0sXDl8UH5rc/DynroXrFp39ryTct/nQevLoXyC6AsDo0t\n",
+       "wajqEUE8Aqksz5rZZzuwKs4594lKJoEA+wGLN05kJOku4HigUyUQM2uU9Jll63mpXw28uRTGXBtM\n",
+       "+NfQAj99HHbqDb2r4Ow7N/WD/PsDSEahORvsJ8CgOWc8lcpykZkt+MQPds65DlYyneiSTgKOMLOz\n",
+       "wvUvAWPNbGrePiXfib6RpFndKxi3rhF27QsvfAuqkhCZuukqpK4Zsrn/71AjmKL2KB9+xDnXFrpC\n",
+       "J/p2dbI0s/FrG3kgEYNT9obqMpDg3R9BSyZ4aVON9zMzha+ImR3pycM5V+pKqQlrGTAob30QsHTz\n",
+       "nSRdkbc6y8xmtW9YrXJSJkvuwdfg4sOgPAF/eQViUVjfBMAzwNFm1lDcMJ1z2xNJ44Hx7f45pfJF\n",
+       "V1IM+BdwKLAcmAOcmt+J3pmasDaStLY6SbdEDHpVBkOsN7ZgwFAze7fY8Tnntn/b/YRSZpaR9HWC\n",
+       "9v8ocHNnuwNrS8ysu6TFpBi2OrjOqDez2iKH5ZxzrVYyVyBbozNegTjnXLF1hU5055xznYgnEOec\n",
+       "cwXxBOKcc64gnkCcc84VxBOIc865gngCcc45VxBPIM455wriCcQ551xBPIE455wriCcQ55xzBfEE\n",
+       "4pxzriCeQJxzzhXEE4hzzrmCeAJxzjlXEE8gzjnnCuIJxDnnXEE8gTjnnCuIJxDnnHMF8QTinHOu\n",
+       "IJ5AnHPOFcQTiHPOuYJ4AnHOOVcQTyDOOecK4gnEOedcQTyBOOecK4gnEOeccwXxBOKcc64gnkCc\n",
+       "c84VpOAEIukKSUslzQ1fR+Ztu1TSIkkLJU3IKx8jaV647bq88qSku8Py2ZKGFF4l55xzHaE1VyAG\n",
+       "XGNmo8PXowCSRgGnAKOAicD1khQecwMw2cxGACMkTQzLJwOrw/JrgataEVfJkjS+2DG0hsdfXJ05\n",
+       "/s4cO3T++NtLa5uwtIWy44E7zSxtZkuAxcBYSf2AajObE+53G3BCuHwccGu4fD9waCvjKlXjix1A\n",
+       "K40vdgCtNL7YAbTS+GIH0Arjix1AK40vdgClqLUJZKqkVyXdLKlbWNYfWJq3z1JgwBbKl4XlhP++\n",
+       "B2BmGWC9pB6tjM0551w7+sQEImlm2Gex+es4guaoYcBewPvALzogXueccyVCZtb6N5GGAg+Z2e6S\n",
+       "LgEwsyvDbTOAy4F3gKfNbGRYfipwkJl9NdznCjObLSkGvG9mvbfwOa0P1jnnuiAz21KXQ6vECj1Q\n",
+       "Uj8zez9c/RwwL1yeBtwh6RqCpqkRwBwzM0l1ksYCc4DTgF/lHXM6MBs4CXhyS5/ZHj8A55xzhSk4\n",
+       "gQBXSdqL4G6st4FzAMxsgaR7gAVABphimy5zpgC3AOXAdDObEZbfDNwuaRGwGpjUiricc851gDZp\n",
+       "wnLOOdf1lMyT6JJ+LumN8K6uByTV5m3r9A8mSpoYxr9I0reLHQ+ApEGSnpY0X9Lrks4Ly3uEN1C8\n",
+       "KenxvDvstvl30UH1iIYPsz7U2eKX1E3SfeHf/gJJYztL/GEs88PPvSP8f1eysUv6g6SVkubllbVZ\n",
+       "vO193vmY+It73jSzkngBhwORcPlK4MpweRTwChAHhhI8V7LxymkOsF+4PB2YGC5PAa4Pl08B7ipy\n",
+       "3aJh3EPDerwCjCyBn3lfYK9wuQr4FzASuBq4OCz/dmt+Fx1Uj28AfwamheudJn6C55/ODJdjQG1n\n",
+       "iD/8/LeAZLh+N0E/ZsnGDnwWGA3Myytrs3hp5/POx8Rf1PNmh/wHL+AH9TngT+HypcC387bNAPYH\n",
+       "+gFv5JVPAv5v3j5jw+UY8EGR63MAMCNv/RLgkmL/nLcQ54PAYcBCoE9Y1hdYWOjvogNiHgg8ARxM\n",
+       "cCcgnSV+gmTx1hbKSz5+oAfBF47u4f+xh8KTWUnHHp5M80/AbRZvR5x3No9/s20dft4smSaszZxJ\n",
+       "kBlh+3gw8aN4QhvrUDIU3Io9GniB4D/UynDTSqBPuFzI76K9XQtcBOTyyjpL/MOADyT9UdLLkm6U\n",
+       "VEkniN/M1hA8+/UusBxYZ2Yz6QSxb6Yt4y32eafDz5sdmkD08Q8mHpu3z3eAFjO7oyNja2clfaeC\n",
+       "pCqCIWTON7MN+dss+DpSkvFLOgZYZWZz2fKwOiUdP8G3vL0Jmg32BhoIrk4/UqrxS9oRuIDgG3F/\n",
+       "oErSl/L3KdXYP05nizdfsc6brbmNd5uZ2eGftF3SGcBR/OdYWMuAQXnrAwky6LJwefPyjccMBpYr\n",
+       "eDCxNvzGVCyb12EQ//ktoGgkxQmSx+1m9mBYvFJSXzNboWAMs1Vh+bb8Lpa1b+QAfBo4TtJRQBlQ\n",
+       "I+l2Ok/8S4GlZvZiuH4fQdPDik4Q/z7AP8xsNYCkBwiaajtD7Pna4m+lqOedYp43S6YJS8HIvBcB\n",
+       "x5tZc96macAkSQlJw9j0YOIKoC68a0UEDyb+Ne+Y08Plj30wsQP9k2D04aGSEgQdVNOKHBPhz+1m\n",
+       "YIGZ/TJvU/7P73SCvpGN5Vv7u3iQdmZml5nZIDMbRtCW+5SZndaJ4l8BvCdp57DoMGA+QX9Cqce/\n",
+       "ENhfUnn4mYcRPPvVGWLP1xZ/K0U77xT9vNlenVUFdA4tIhjuZG74uj5v22UEdxEsBI7IKx9D8AT8\n",
+       "YuBXeeVJ4J7wPWcDQ0ugfkcSdDouBi4tdjxhTJ8h6Dt4Je/nPpGgg/QJ4E3gcaBbob+LDqzLODbd\n",
+       "hdVp4gf2BF4EXgUeIOhY7xTxAxcTJLx5BHeTxUs5duBOgv6aFoK2/i+3Zbztfd7ZQvxnUuTzpj9I\n",
+       "6JxzriAl04TlnHOuc/EE4pxzriCeQJxzzhXEE4hzzrmCeAJxzjlXEE8gzjnnCuIJxDnnXEE8gTjn\n",
+       "nCvI/wPHlKEPLEczaAAAAABJRU5ErkJggg==\n"
+      ],
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x10b4b26d8>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "labels = clus.labels_\n",
+    "\n",
+    "plt.scatter(x=game_mean, y=game_std, c=labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Game clusters\n",
+    "\n",
+    "It looks like most of the games are similar, but as the game attributes tend to increase in value (such as number of users who rated), there are fewer high quality games.  So most games don't get played much, but a few get a lot of players."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "yearpublished           0.108461\n",
+       "minplayers             -0.032701\n",
+       "maxplayers             -0.008335\n",
+       "playingtime             0.048994\n",
+       "minplaytime             0.043985\n",
+       "maxplaytime             0.048994\n",
+       "minage                  0.210049\n",
+       "users_rated             0.112564\n",
+       "average_rating          1.000000\n",
+       "bayes_average_rating    0.231563\n",
+       "total_owners            0.137478\n",
+       "total_traders           0.119452\n",
+       "total_wanters           0.196566\n",
+       "total_wishers           0.171375\n",
+       "total_comments          0.123714\n",
+       "total_weights           0.109691\n",
+       "average_weight          0.351081\n",
+       "Name: average_rating, dtype: float64"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "correlations = numeric.corr()\n",
+    "\n",
+    "correlations[\"average_rating\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Correlations\n",
+    "\n",
+    "The `yearpublished` column is surprisingly highly correlated with `average_rating`, showing that more recent games tend to be rated more highly.  Games for older players (`minage` is high) tend to be more highly rated.  The more \"weighty\" a game is (`average_weight` is high), the more highly it tends to be rated."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2.0933969758339361"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "reg = LinearRegression()\n",
+    "cols.remove(\"average_rating\")\n",
+    "cols.remove(\"bayes_average_rating\")\n",
+    "reg.fit(board_games[cols], board_games[\"average_rating\"])\n",
+    "predictions = reg.predict(board_games[cols])\n",
+    "\n",
+    "numpy.mean((predictions - board_games[\"average_rating\"]) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Game clusters\n",
+    "\n",
+    "The error rate is close to the standard deviation of all board game ratings.  This indicates that our model may not have high predictive power.  We'll need to dig more into which games were scored well, and which ones weren't."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}