Queer European MD passionate about IT
srinify преди 7 години
родител
ревизия
a491fc0632
променени са 2 файла, в които са добавени 5516 реда и са изтрити 0 реда
  1. 1950 0
      Mission167Solutions.ipynb
  2. 3566 0
      Mission240Solutions.ipynb

+ 1950 - 0
Mission167Solutions.ipynb

@@ -0,0 +1,1950 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Introduction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "pd.options.display.max_columns = 99\n",
+    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Compute each column's missing value counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "company_country_code          1\n",
+       "company_name                  1\n",
+       "company_permalink             1\n",
+       "company_region                1\n",
+       "investor_region               2\n",
+       "investor_permalink            2\n",
+       "investor_name                 2\n",
+       "funded_quarter                3\n",
+       "funded_at                     3\n",
+       "funded_month                  3\n",
+       "funded_year                   3\n",
+       "funding_round_type            3\n",
+       "company_state_code          492\n",
+       "company_city                533\n",
+       "company_category_code       643\n",
+       "raised_amount_usd          3599\n",
+       "investor_country_code     12001\n",
+       "investor_city             12480\n",
+       "investor_state_code       16809\n",
+       "investor_category_code    50427\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mv_list = []\n",
+    "for chunk in chunk_iter:\n",
+    "    mv_list.append(chunk.isnull().sum())\n",
+    "    \n",
+    "combined_mv_vc = pd.concat(mv_list)\n",
+    "unique_combined_mv_vc = combined_mv_vc.groupby(combined_mv_vc.index).sum()\n",
+    "unique_combined_mv_vc.sort_values()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Total memory footprint for each column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "company_permalink         4057788\n",
+       "company_name              3591326\n",
+       "company_category_code     3421104\n",
+       "company_country_code      3172176\n",
+       "company_state_code        3106051\n",
+       "company_region            3411585\n",
+       "company_city              3505926\n",
+       "investor_permalink        4980548\n",
+       "investor_name             3915666\n",
+       "investor_category_code     622424\n",
+       "investor_country_code     2647292\n",
+       "investor_state_code       2476607\n",
+       "investor_region           3396281\n",
+       "investor_city             2885083\n",
+       "funding_round_type        3410707\n",
+       "funded_at                 3542185\n",
+       "funded_month              3383584\n",
+       "funded_quarter            3383584\n",
+       "funded_year                422960\n",
+       "raised_amount_usd          422960\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')\n",
+    "counter = 0\n",
+    "series_memory_fp = pd.Series()\n",
+    "for chunk in chunk_iter:\n",
+    "    if counter == 0:\n",
+    "        series_memory_fp = chunk.memory_usage(deep=True)\n",
+    "    else:\n",
+    "        series_memory_fp += chunk.memory_usage(deep=True)\n",
+    "    counter += 1\n",
+    "\n",
+    "# Drop memory footprint calculation for the index.\n",
+    "series_memory_fp = series_memory_fp.drop('Index')\n",
+    "series_memory_fp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Total memory footprint of the data (in megabytes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "56.987607002258301"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "series_memory_fp.sum() / (1024 * 1024)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "company_country_code          1\n",
+       "company_name                  1\n",
+       "company_permalink             1\n",
+       "company_region                1\n",
+       "investor_region               2\n",
+       "investor_permalink            2\n",
+       "investor_name                 2\n",
+       "funded_quarter                3\n",
+       "funded_at                     3\n",
+       "funded_month                  3\n",
+       "funded_year                   3\n",
+       "funding_round_type            3\n",
+       "company_state_code          492\n",
+       "company_city                533\n",
+       "company_category_code       643\n",
+       "raised_amount_usd          3599\n",
+       "investor_country_code     12001\n",
+       "investor_city             12480\n",
+       "investor_state_code       16809\n",
+       "investor_category_code    50427\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "unique_combined_mv_vc.sort_values()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Drop columns representing URL's or containing way too many missing values (>90% missing)\n",
+    "drop_cols = ['investor_permalink', 'company_permalink', 'investor_category_code']\n",
+    "keep_cols = chunk.columns.drop(drop_cols)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['company_name', 'company_category_code', 'company_country_code',\n",
+       "       'company_state_code', 'company_region', 'company_city', 'investor_name',\n",
+       "       'investor_country_code', 'investor_state_code', 'investor_region',\n",
+       "       'investor_city', 'funding_round_type', 'funded_at', 'funded_month',\n",
+       "       'funded_quarter', 'funded_year', 'raised_amount_usd'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "keep_cols.tolist"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Selecting Data Types\n",
+    "\n",
+    "Let's first determine which columns shift types across chunks. Note that we only lay the groundwork for this step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Key: Column name, Value: List of types\n",
+    "col_types = {}\n",
+    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1', usecols=keep_cols)\n",
+    "\n",
+    "for chunk in chunk_iter:\n",
+    "    for col in chunk.columns:\n",
+    "        if col not in col_types:\n",
+    "            col_types[col] = [str(chunk.dtypes[col])]\n",
+    "        else:\n",
+    "            col_types[col].append(str(chunk.dtypes[col]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'company_category_code': {'object'},\n",
+       " 'company_city': {'object'},\n",
+       " 'company_country_code': {'object'},\n",
+       " 'company_name': {'object'},\n",
+       " 'company_region': {'object'},\n",
+       " 'company_state_code': {'object'},\n",
+       " 'funded_at': {'object'},\n",
+       " 'funded_month': {'object'},\n",
+       " 'funded_quarter': {'object'},\n",
+       " 'funded_year': {'float64', 'int64'},\n",
+       " 'funding_round_type': {'object'},\n",
+       " 'investor_city': {'float64', 'object'},\n",
+       " 'investor_country_code': {'float64', 'object'},\n",
+       " 'investor_name': {'object'},\n",
+       " 'investor_region': {'object'},\n",
+       " 'investor_state_code': {'float64', 'object'},\n",
+       " 'raised_amount_usd': {'float64'}}"
+      ]
+     },
+     "execution_count": 84,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "uniq_col_types = {}\n",
+    "for k,v in col_types.items():\n",
+    "    uniq_col_types[k] = set(col_types[k])\n",
+    "uniq_col_types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>company_name</th>\n",
+       "      <th>company_category_code</th>\n",
+       "      <th>company_country_code</th>\n",
+       "      <th>company_state_code</th>\n",
+       "      <th>company_region</th>\n",
+       "      <th>company_city</th>\n",
+       "      <th>investor_name</th>\n",
+       "      <th>investor_country_code</th>\n",
+       "      <th>investor_state_code</th>\n",
+       "      <th>investor_region</th>\n",
+       "      <th>investor_city</th>\n",
+       "      <th>funding_round_type</th>\n",
+       "      <th>funded_at</th>\n",
+       "      <th>funded_month</th>\n",
+       "      <th>funded_quarter</th>\n",
+       "      <th>funded_year</th>\n",
+       "      <th>raised_amount_usd</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>50000</th>\n",
+       "      <td>NuORDER</td>\n",
+       "      <td>fashion</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>West Hollywood</td>\n",
+       "      <td>Mortimer Singer</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2012-10-01</td>\n",
+       "      <td>2012-10</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>3060000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50001</th>\n",
+       "      <td>ChaCha</td>\n",
+       "      <td>advertising</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>IN</td>\n",
+       "      <td>Indianapolis</td>\n",
+       "      <td>Carmel</td>\n",
+       "      <td>Morton Meyerson</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-b</td>\n",
+       "      <td>2007-10-01</td>\n",
+       "      <td>2007-10</td>\n",
+       "      <td>2007-Q4</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>12000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50002</th>\n",
+       "      <td>Binfire</td>\n",
+       "      <td>software</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>FL</td>\n",
+       "      <td>Bocat Raton</td>\n",
+       "      <td>Bocat Raton</td>\n",
+       "      <td>Moshe Ariel</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2008-04-18</td>\n",
+       "      <td>2008-04</td>\n",
+       "      <td>2008-Q2</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>500000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50003</th>\n",
+       "      <td>Binfire</td>\n",
+       "      <td>software</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>FL</td>\n",
+       "      <td>Bocat Raton</td>\n",
+       "      <td>Bocat Raton</td>\n",
+       "      <td>Moshe Ariel</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2010-01-01</td>\n",
+       "      <td>2010-01</td>\n",
+       "      <td>2010-Q1</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>750000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50004</th>\n",
+       "      <td>Unified Color</td>\n",
+       "      <td>software</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>South San Frnacisco</td>\n",
+       "      <td>Mr. Andrew Oung</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2010-01-01</td>\n",
+       "      <td>2010-01</td>\n",
+       "      <td>2010-Q1</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50005</th>\n",
+       "      <td>HItviews</td>\n",
+       "      <td>advertising</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York City</td>\n",
+       "      <td>multiple parties</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2007-11-29</td>\n",
+       "      <td>2007-11</td>\n",
+       "      <td>2007-Q4</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>485000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50006</th>\n",
+       "      <td>LockerDome</td>\n",
+       "      <td>social</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>MO</td>\n",
+       "      <td>Saint Louis</td>\n",
+       "      <td>St. Louis</td>\n",
+       "      <td>multiple parties</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-04-17</td>\n",
+       "      <td>2012-04</td>\n",
+       "      <td>2012-Q2</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>300000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50007</th>\n",
+       "      <td>ThirdLove</td>\n",
+       "      <td>ecommerce</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Munjal Shah</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2012-12-01</td>\n",
+       "      <td>2012-12</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>5600000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50008</th>\n",
+       "      <td>Hakia</td>\n",
+       "      <td>search</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>TBD</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Murat Vargi</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2006-11-01</td>\n",
+       "      <td>2006-11</td>\n",
+       "      <td>2006-Q4</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>16000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50009</th>\n",
+       "      <td>bookacoach</td>\n",
+       "      <td>sports</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>IN</td>\n",
+       "      <td>Indianapolis</td>\n",
+       "      <td>Indianapolis</td>\n",
+       "      <td>Myles Grote</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-11-01</td>\n",
+       "      <td>2012-11</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50010</th>\n",
+       "      <td>LocalCircles</td>\n",
+       "      <td>social</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Santa Clara</td>\n",
+       "      <td>Nadir Godrej</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-09-01</td>\n",
+       "      <td>2012-09</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50011</th>\n",
+       "      <td>Graphdive</td>\n",
+       "      <td>analytics</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Menlo Park</td>\n",
+       "      <td>Naguib Sawiris</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-10-04</td>\n",
+       "      <td>2012-10</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>1000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50012</th>\n",
+       "      <td>Ribbon</td>\n",
+       "      <td>ecommerce</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Naguib Sawiris</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2013-02-05</td>\n",
+       "      <td>2013-02</td>\n",
+       "      <td>2013-Q1</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1630000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50013</th>\n",
+       "      <td>Dokkankom.com</td>\n",
+       "      <td>ecommerce</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>new york</td>\n",
+       "      <td>Namek Zu'bi</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-10-10</td>\n",
+       "      <td>2011-10</td>\n",
+       "      <td>2011-Q4</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>30000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50014</th>\n",
+       "      <td>Lookery</td>\n",
+       "      <td>web</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Nana Shin</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2008-02-07</td>\n",
+       "      <td>2008-02</td>\n",
+       "      <td>2008-Q1</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>900000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50015</th>\n",
+       "      <td>TrustDegrees</td>\n",
+       "      <td>web</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>Kenmore</td>\n",
+       "      <td>Kenmore</td>\n",
+       "      <td>Nancy Barrett</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-06-09</td>\n",
+       "      <td>2011-06</td>\n",
+       "      <td>2011-Q2</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>8000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50016</th>\n",
+       "      <td>Altavoz</td>\n",
+       "      <td>games_video</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>DC</td>\n",
+       "      <td>Washington DC</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>Nancy Jacobsen</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-09-11</td>\n",
+       "      <td>2012-09</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>150000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50017</th>\n",
+       "      <td>EdSurge</td>\n",
+       "      <td>education</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Burlingame</td>\n",
+       "      <td>Nancy Peretsman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-08-29</td>\n",
+       "      <td>2012-08</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>400000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50018</th>\n",
+       "      <td>FullContact</td>\n",
+       "      <td>enterprise</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CO</td>\n",
+       "      <td>Denver</td>\n",
+       "      <td>Denver</td>\n",
+       "      <td>Nancy Pierce</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-b</td>\n",
+       "      <td>2012-07-09</td>\n",
+       "      <td>2012-07</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>7000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50019</th>\n",
+       "      <td>Rapt Media</td>\n",
+       "      <td>enterprise</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CO</td>\n",
+       "      <td>Denver</td>\n",
+       "      <td>Boulder</td>\n",
+       "      <td>Nancy Pierce</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2013-01-23</td>\n",
+       "      <td>2013-01</td>\n",
+       "      <td>2013-Q1</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>2288803.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50020</th>\n",
+       "      <td>Humanoid</td>\n",
+       "      <td>software</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Nat Friedman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2010-12-01</td>\n",
+       "      <td>2010-12</td>\n",
+       "      <td>2010-Q4</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>1100000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50021</th>\n",
+       "      <td>Runscope</td>\n",
+       "      <td>web</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Nat Friedman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2013-05-22</td>\n",
+       "      <td>2013-05</td>\n",
+       "      <td>2013-Q2</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1100000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50022</th>\n",
+       "      <td>Adzerk</td>\n",
+       "      <td>advertising</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NC</td>\n",
+       "      <td>Raleigh-Durham</td>\n",
+       "      <td>Durham</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-07-12</td>\n",
+       "      <td>2011-07</td>\n",
+       "      <td>2011-Q3</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>650000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50023</th>\n",
+       "      <td>Adaptly</td>\n",
+       "      <td>advertising</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2011-04-18</td>\n",
+       "      <td>2011-04</td>\n",
+       "      <td>2011-Q2</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>2000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50024</th>\n",
+       "      <td>Lore</td>\n",
+       "      <td>education</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-06-27</td>\n",
+       "      <td>2011-06</td>\n",
+       "      <td>2011-Q2</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>1000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50025</th>\n",
+       "      <td>Tasted Menu</td>\n",
+       "      <td>hospitality</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>MA</td>\n",
+       "      <td>Boston</td>\n",
+       "      <td>Boston</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-05-01</td>\n",
+       "      <td>2011-05</td>\n",
+       "      <td>2011-Q2</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50026</th>\n",
+       "      <td>Lua Technologies</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2012-08-01</td>\n",
+       "      <td>2012-08</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>2500000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50027</th>\n",
+       "      <td>Blue Apron</td>\n",
+       "      <td>hospitality</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2013-02-19</td>\n",
+       "      <td>2013-02</td>\n",
+       "      <td>2013-Q1</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>3000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50028</th>\n",
+       "      <td>ChatID</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-01-01</td>\n",
+       "      <td>2012-01</td>\n",
+       "      <td>2012-Q1</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50029</th>\n",
+       "      <td>Breakthrough Behavioral</td>\n",
+       "      <td>health</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Redwood City</td>\n",
+       "      <td>Nat Turner</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-08-13</td>\n",
+       "      <td>2012-08</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>900000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52840</th>\n",
+       "      <td>Meddik</td>\n",
+       "      <td>health</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Zach Weinberg</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-05-24</td>\n",
+       "      <td>2012-05</td>\n",
+       "      <td>2012-Q2</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>750000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52841</th>\n",
+       "      <td>Blue Apron</td>\n",
+       "      <td>hospitality</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Zach Weinberg</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2013-02-19</td>\n",
+       "      <td>2013-02</td>\n",
+       "      <td>2013-Q1</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>3000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52842</th>\n",
+       "      <td>ChatID</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Zach Weinberg</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-01-01</td>\n",
+       "      <td>2012-01</td>\n",
+       "      <td>2012-Q1</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52843</th>\n",
+       "      <td>Breakthrough Behavioral</td>\n",
+       "      <td>health</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Redwood City</td>\n",
+       "      <td>Zach Weinberg</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-08-13</td>\n",
+       "      <td>2012-08</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>900000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52844</th>\n",
+       "      <td>Plaid</td>\n",
+       "      <td>software</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zach Weinberg</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2013-09-19</td>\n",
+       "      <td>2013-09</td>\n",
+       "      <td>2013-Q3</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>2800000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52845</th>\n",
+       "      <td>PokitDok</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Menlo Park</td>\n",
+       "      <td>Zach Zeitlin</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-07-12</td>\n",
+       "      <td>2012-07</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>1300000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52846</th>\n",
+       "      <td>Fitocracy</td>\n",
+       "      <td>web</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Zachary Aarons</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-09-01</td>\n",
+       "      <td>2011-09</td>\n",
+       "      <td>2011-Q3</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>250000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52847</th>\n",
+       "      <td>Square</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zachary Bogue</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2009-11-01</td>\n",
+       "      <td>2009-11</td>\n",
+       "      <td>2009-Q4</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>10000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52848</th>\n",
+       "      <td>MixRank</td>\n",
+       "      <td>advertising</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zachary Bogue</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2011-11-18</td>\n",
+       "      <td>2011-11</td>\n",
+       "      <td>2011-Q4</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>1500000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52849</th>\n",
+       "      <td>Socialcam</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>Santa Clara County</td>\n",
+       "      <td>Santa Clara County</td>\n",
+       "      <td>Zachary Bogue</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-04-30</td>\n",
+       "      <td>2012-04</td>\n",
+       "      <td>2012-Q2</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52850</th>\n",
+       "      <td>Nuzzel</td>\n",
+       "      <td>news</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zachary Bogue</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>venture</td>\n",
+       "      <td>2012-11-15</td>\n",
+       "      <td>2012-11</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>1700000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52851</th>\n",
+       "      <td>ThirdLove</td>\n",
+       "      <td>ecommerce</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zachary Bogue</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2012-12-01</td>\n",
+       "      <td>2012-12</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>5600000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52852</th>\n",
+       "      <td>MXD3D</td>\n",
+       "      <td>web</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zaid Ayoub</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-01-01</td>\n",
+       "      <td>2012-01</td>\n",
+       "      <td>2012-Q1</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>300000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52853</th>\n",
+       "      <td>MXD3D</td>\n",
+       "      <td>web</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zaid Ayoub</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-01-01</td>\n",
+       "      <td>2011-01</td>\n",
+       "      <td>2011-Q1</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>300000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52854</th>\n",
+       "      <td>Verious</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Carlos</td>\n",
+       "      <td>Zain Khan</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2011-05-30</td>\n",
+       "      <td>2011-05</td>\n",
+       "      <td>2011-Q2</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>800000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52855</th>\n",
+       "      <td>Identified</td>\n",
+       "      <td>analytics</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Zao Yang</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-b</td>\n",
+       "      <td>2012-06-05</td>\n",
+       "      <td>2012-06</td>\n",
+       "      <td>2012-Q2</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>21000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52856</th>\n",
+       "      <td>HaulerDeals</td>\n",
+       "      <td>fashion</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>Zaw Thet</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2012-10-31</td>\n",
+       "      <td>2012-10</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>1250000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52857</th>\n",
+       "      <td>When You Wish</td>\n",
+       "      <td>nonprofit</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>Marina Del Rey</td>\n",
+       "      <td>Zelda Marzec</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2011-02-01</td>\n",
+       "      <td>2011-02</td>\n",
+       "      <td>2011-Q1</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>1500000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52858</th>\n",
+       "      <td>Farmeron</td>\n",
+       "      <td>analytics</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Mountain View</td>\n",
+       "      <td>Zeljko Mataija</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2010-10-01</td>\n",
+       "      <td>2010-10</td>\n",
+       "      <td>2010-Q4</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>15000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52859</th>\n",
+       "      <td>Theraclone Sciences</td>\n",
+       "      <td>biotech</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>WA</td>\n",
+       "      <td>Seattle</td>\n",
+       "      <td>Seattle</td>\n",
+       "      <td>Zenyaku Kogyo</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-b</td>\n",
+       "      <td>2013-03-25</td>\n",
+       "      <td>2013-03</td>\n",
+       "      <td>2013-Q1</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>8000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52860</th>\n",
+       "      <td>SimpleGeo</td>\n",
+       "      <td>advertising</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>Ziv Navoth</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>other</td>\n",
+       "      <td>2009-11-10</td>\n",
+       "      <td>2009-11</td>\n",
+       "      <td>2009-Q4</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>195000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52861</th>\n",
+       "      <td>Open Me</td>\n",
+       "      <td>ecommerce</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>Ziver Birg</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2013-08-01</td>\n",
+       "      <td>2013-08</td>\n",
+       "      <td>2013-Q3</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52862</th>\n",
+       "      <td>Comprehend Systems</td>\n",
+       "      <td>enterprise</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Palo Alto</td>\n",
+       "      <td>Zod Nazem</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2013-07-11</td>\n",
+       "      <td>2013-07</td>\n",
+       "      <td>2013-Q3</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>8400000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52863</th>\n",
+       "      <td>Payoneer</td>\n",
+       "      <td>other</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>Zohar Gilon</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2005-01-01</td>\n",
+       "      <td>2005-01</td>\n",
+       "      <td>2005-Q1</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>2000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52864</th>\n",
+       "      <td>Outbrain</td>\n",
+       "      <td>web</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>New York City</td>\n",
+       "      <td>Zohar Gilon</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-b</td>\n",
+       "      <td>2009-02-11</td>\n",
+       "      <td>2009-02</td>\n",
+       "      <td>2009-Q1</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>12000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52865</th>\n",
+       "      <td>Garantia Data</td>\n",
+       "      <td>enterprise</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Santa Clara</td>\n",
+       "      <td>Zohar Gilon</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2012-08-08</td>\n",
+       "      <td>2012-08</td>\n",
+       "      <td>2012-Q3</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>3800000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52866</th>\n",
+       "      <td>DudaMobile</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Palo Alto</td>\n",
+       "      <td>Zohar Gilon</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-c+</td>\n",
+       "      <td>2013-04-08</td>\n",
+       "      <td>2013-04</td>\n",
+       "      <td>2013-Q2</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>10300000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52867</th>\n",
+       "      <td>SiteBrains</td>\n",
+       "      <td>software</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>zohar israel</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>angel</td>\n",
+       "      <td>2010-08-01</td>\n",
+       "      <td>2010-08</td>\n",
+       "      <td>2010-Q3</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>350000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52868</th>\n",
+       "      <td>Comprehend Systems</td>\n",
+       "      <td>enterprise</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>SF Bay</td>\n",
+       "      <td>Palo Alto</td>\n",
+       "      <td>Zorba Lieberman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2013-07-11</td>\n",
+       "      <td>2013-07</td>\n",
+       "      <td>2013-Q3</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>8400000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52869</th>\n",
+       "      <td>SmartThings</td>\n",
+       "      <td>mobile</td>\n",
+       "      <td>USA</td>\n",
+       "      <td>DC</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>Minneapolis</td>\n",
+       "      <td>Zorik Gordon</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>series-a</td>\n",
+       "      <td>2012-12-04</td>\n",
+       "      <td>2012-12</td>\n",
+       "      <td>2012-Q4</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>3000000.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2870 rows × 17 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  company_name company_category_code company_country_code  \\\n",
+       "50000                  NuORDER               fashion                  USA   \n",
+       "50001                   ChaCha           advertising                  USA   \n",
+       "50002                  Binfire              software                  USA   \n",
+       "50003                  Binfire              software                  USA   \n",
+       "50004            Unified Color              software                  USA   \n",
+       "50005                 HItviews           advertising                  USA   \n",
+       "50006               LockerDome                social                  USA   \n",
+       "50007                ThirdLove             ecommerce                  USA   \n",
+       "50008                    Hakia                search                  USA   \n",
+       "50009               bookacoach                sports                  USA   \n",
+       "50010             LocalCircles                social                  USA   \n",
+       "50011                Graphdive             analytics                  USA   \n",
+       "50012                   Ribbon             ecommerce                  USA   \n",
+       "50013            Dokkankom.com             ecommerce                  USA   \n",
+       "50014                  Lookery                   web                  USA   \n",
+       "50015             TrustDegrees                   web                  USA   \n",
+       "50016                  Altavoz           games_video                  USA   \n",
+       "50017                  EdSurge             education                  USA   \n",
+       "50018              FullContact            enterprise                  USA   \n",
+       "50019               Rapt Media            enterprise                  USA   \n",
+       "50020                 Humanoid              software                  USA   \n",
+       "50021                 Runscope                   web                  USA   \n",
+       "50022                   Adzerk           advertising                  USA   \n",
+       "50023                  Adaptly           advertising                  USA   \n",
+       "50024                     Lore             education                  USA   \n",
+       "50025              Tasted Menu           hospitality                  USA   \n",
+       "50026         Lua Technologies                mobile                  USA   \n",
+       "50027               Blue Apron           hospitality                  USA   \n",
+       "50028                   ChatID                mobile                  USA   \n",
+       "50029  Breakthrough Behavioral                health                  USA   \n",
+       "...                        ...                   ...                  ...   \n",
+       "52840                   Meddik                health                  USA   \n",
+       "52841               Blue Apron           hospitality                  USA   \n",
+       "52842                   ChatID                mobile                  USA   \n",
+       "52843  Breakthrough Behavioral                health                  USA   \n",
+       "52844                    Plaid              software                  USA   \n",
+       "52845                 PokitDok                mobile                  USA   \n",
+       "52846                Fitocracy                   web                  USA   \n",
+       "52847                   Square                mobile                  USA   \n",
+       "52848                  MixRank           advertising                  USA   \n",
+       "52849                Socialcam                mobile                  USA   \n",
+       "52850                   Nuzzel                  news                  USA   \n",
+       "52851                ThirdLove             ecommerce                  USA   \n",
+       "52852                    MXD3D                   web                  USA   \n",
+       "52853                    MXD3D                   web                  USA   \n",
+       "52854                  Verious                mobile                  USA   \n",
+       "52855               Identified             analytics                  USA   \n",
+       "52856              HaulerDeals               fashion                  USA   \n",
+       "52857            When You Wish             nonprofit                  USA   \n",
+       "52858                 Farmeron             analytics                  USA   \n",
+       "52859      Theraclone Sciences               biotech                  USA   \n",
+       "52860                SimpleGeo           advertising                  USA   \n",
+       "52861                  Open Me             ecommerce                  USA   \n",
+       "52862       Comprehend Systems            enterprise                  USA   \n",
+       "52863                 Payoneer                 other                  USA   \n",
+       "52864                 Outbrain                   web                  USA   \n",
+       "52865            Garantia Data            enterprise                  USA   \n",
+       "52866               DudaMobile                mobile                  USA   \n",
+       "52867               SiteBrains              software                  USA   \n",
+       "52868       Comprehend Systems            enterprise                  USA   \n",
+       "52869              SmartThings                mobile                  USA   \n",
+       "\n",
+       "      company_state_code      company_region         company_city  \\\n",
+       "50000                 CA         Los Angeles       West Hollywood   \n",
+       "50001                 IN        Indianapolis               Carmel   \n",
+       "50002                 FL         Bocat Raton          Bocat Raton   \n",
+       "50003                 FL         Bocat Raton          Bocat Raton   \n",
+       "50004                 CA              SF Bay  South San Frnacisco   \n",
+       "50005                 NY            New York        New York City   \n",
+       "50006                 MO         Saint Louis            St. Louis   \n",
+       "50007                 CA              SF Bay        San Francisco   \n",
+       "50008                NaN                 TBD                  NaN   \n",
+       "50009                 IN        Indianapolis         Indianapolis   \n",
+       "50010                 CA              SF Bay          Santa Clara   \n",
+       "50011                 CA              SF Bay           Menlo Park   \n",
+       "50012                 CA              SF Bay        San Francisco   \n",
+       "50013                 NY            New York             new york   \n",
+       "50014                 CA              SF Bay        San Francisco   \n",
+       "50015                 NY             Kenmore              Kenmore   \n",
+       "50016                 DC       Washington DC           Washington   \n",
+       "50017                 CA              SF Bay           Burlingame   \n",
+       "50018                 CO              Denver               Denver   \n",
+       "50019                 CO              Denver              Boulder   \n",
+       "50020                 CA              SF Bay        San Francisco   \n",
+       "50021                 CA              SF Bay        San Francisco   \n",
+       "50022                 NC      Raleigh-Durham               Durham   \n",
+       "50023                 NY            New York             New York   \n",
+       "50024                 NY            New York             New York   \n",
+       "50025                 MA              Boston               Boston   \n",
+       "50026                 NY            New York             New York   \n",
+       "50027                 NY            New York             Brooklyn   \n",
+       "50028                 NY            New York             New York   \n",
+       "50029                 CA              SF Bay         Redwood City   \n",
+       "...                  ...                 ...                  ...   \n",
+       "52840                 NY            New York             New York   \n",
+       "52841                 NY            New York             Brooklyn   \n",
+       "52842                 NY            New York             New York   \n",
+       "52843                 CA              SF Bay         Redwood City   \n",
+       "52844                 CA              SF Bay        San Francisco   \n",
+       "52845                 CA              SF Bay           Menlo Park   \n",
+       "52846                 NY            New York             New York   \n",
+       "52847                 CA              SF Bay        San Francisco   \n",
+       "52848                 CA              SF Bay        San Francisco   \n",
+       "52849                 CA  Santa Clara County   Santa Clara County   \n",
+       "52850                 CA              SF Bay        San Francisco   \n",
+       "52851                 CA              SF Bay        San Francisco   \n",
+       "52852                 CA              SF Bay        San Francisco   \n",
+       "52853                 CA              SF Bay        San Francisco   \n",
+       "52854                 CA              SF Bay           San Carlos   \n",
+       "52855                 CA              SF Bay        San Francisco   \n",
+       "52856                 CA         Los Angeles          Los Angeles   \n",
+       "52857                 CA         Los Angeles       Marina Del Rey   \n",
+       "52858                 CA              SF Bay        Mountain View   \n",
+       "52859                 WA             Seattle              Seattle   \n",
+       "52860                 CA              SF Bay        San Francisco   \n",
+       "52861                 CA         Los Angeles          Los Angeles   \n",
+       "52862                 CA              SF Bay            Palo Alto   \n",
+       "52863                 NY            New York             New York   \n",
+       "52864                 NY            New York        New York City   \n",
+       "52865                 CA              SF Bay          Santa Clara   \n",
+       "52866                 CA              SF Bay            Palo Alto   \n",
+       "52867                 CA              SF Bay        San Francisco   \n",
+       "52868                 CA              SF Bay            Palo Alto   \n",
+       "52869                 DC             unknown          Minneapolis   \n",
+       "\n",
+       "          investor_name  investor_country_code  investor_state_code  \\\n",
+       "50000   Mortimer Singer                    NaN                  NaN   \n",
+       "50001   Morton Meyerson                    NaN                  NaN   \n",
+       "50002       Moshe Ariel                    NaN                  NaN   \n",
+       "50003       Moshe Ariel                    NaN                  NaN   \n",
+       "50004   Mr. Andrew Oung                    NaN                  NaN   \n",
+       "50005  multiple parties                    NaN                  NaN   \n",
+       "50006  multiple parties                    NaN                  NaN   \n",
+       "50007       Munjal Shah                    NaN                  NaN   \n",
+       "50008       Murat Vargi                    NaN                  NaN   \n",
+       "50009       Myles Grote                    NaN                  NaN   \n",
+       "50010      Nadir Godrej                    NaN                  NaN   \n",
+       "50011    Naguib Sawiris                    NaN                  NaN   \n",
+       "50012    Naguib Sawiris                    NaN                  NaN   \n",
+       "50013       Namek Zu'bi                    NaN                  NaN   \n",
+       "50014         Nana Shin                    NaN                  NaN   \n",
+       "50015     Nancy Barrett                    NaN                  NaN   \n",
+       "50016    Nancy Jacobsen                    NaN                  NaN   \n",
+       "50017   Nancy Peretsman                    NaN                  NaN   \n",
+       "50018      Nancy Pierce                    NaN                  NaN   \n",
+       "50019      Nancy Pierce                    NaN                  NaN   \n",
+       "50020      Nat Friedman                    NaN                  NaN   \n",
+       "50021      Nat Friedman                    NaN                  NaN   \n",
+       "50022        Nat Turner                    NaN                  NaN   \n",
+       "50023        Nat Turner                    NaN                  NaN   \n",
+       "50024        Nat Turner                    NaN                  NaN   \n",
+       "50025        Nat Turner                    NaN                  NaN   \n",
+       "50026        Nat Turner                    NaN                  NaN   \n",
+       "50027        Nat Turner                    NaN                  NaN   \n",
+       "50028        Nat Turner                    NaN                  NaN   \n",
+       "50029        Nat Turner                    NaN                  NaN   \n",
+       "...                 ...                    ...                  ...   \n",
+       "52840     Zach Weinberg                    NaN                  NaN   \n",
+       "52841     Zach Weinberg                    NaN                  NaN   \n",
+       "52842     Zach Weinberg                    NaN                  NaN   \n",
+       "52843     Zach Weinberg                    NaN                  NaN   \n",
+       "52844     Zach Weinberg                    NaN                  NaN   \n",
+       "52845      Zach Zeitlin                    NaN                  NaN   \n",
+       "52846    Zachary Aarons                    NaN                  NaN   \n",
+       "52847     Zachary Bogue                    NaN                  NaN   \n",
+       "52848     Zachary Bogue                    NaN                  NaN   \n",
+       "52849     Zachary Bogue                    NaN                  NaN   \n",
+       "52850     Zachary Bogue                    NaN                  NaN   \n",
+       "52851     Zachary Bogue                    NaN                  NaN   \n",
+       "52852        Zaid Ayoub                    NaN                  NaN   \n",
+       "52853        Zaid Ayoub                    NaN                  NaN   \n",
+       "52854         Zain Khan                    NaN                  NaN   \n",
+       "52855          Zao Yang                    NaN                  NaN   \n",
+       "52856          Zaw Thet                    NaN                  NaN   \n",
+       "52857      Zelda Marzec                    NaN                  NaN   \n",
+       "52858    Zeljko Mataija                    NaN                  NaN   \n",
+       "52859     Zenyaku Kogyo                    NaN                  NaN   \n",
+       "52860        Ziv Navoth                    NaN                  NaN   \n",
+       "52861        Ziver Birg                    NaN                  NaN   \n",
+       "52862         Zod Nazem                    NaN                  NaN   \n",
+       "52863       Zohar Gilon                    NaN                  NaN   \n",
+       "52864       Zohar Gilon                    NaN                  NaN   \n",
+       "52865       Zohar Gilon                    NaN                  NaN   \n",
+       "52866       Zohar Gilon                    NaN                  NaN   \n",
+       "52867      zohar israel                    NaN                  NaN   \n",
+       "52868   Zorba Lieberman                    NaN                  NaN   \n",
+       "52869      Zorik Gordon                    NaN                  NaN   \n",
+       "\n",
+       "      investor_region  investor_city funding_round_type   funded_at  \\\n",
+       "50000         unknown            NaN           series-a  2012-10-01   \n",
+       "50001         unknown            NaN           series-b  2007-10-01   \n",
+       "50002         unknown            NaN              angel  2008-04-18   \n",
+       "50003         unknown            NaN              angel  2010-01-01   \n",
+       "50004         unknown            NaN              angel  2010-01-01   \n",
+       "50005         unknown            NaN              angel  2007-11-29   \n",
+       "50006         unknown            NaN              angel  2012-04-17   \n",
+       "50007         unknown            NaN           series-a  2012-12-01   \n",
+       "50008         unknown            NaN           series-a  2006-11-01   \n",
+       "50009         unknown            NaN              angel  2012-11-01   \n",
+       "50010         unknown            NaN              angel  2012-09-01   \n",
+       "50011         unknown            NaN              angel  2012-10-04   \n",
+       "50012         unknown            NaN           series-a  2013-02-05   \n",
+       "50013         unknown            NaN              angel  2011-10-10   \n",
+       "50014         unknown            NaN              angel  2008-02-07   \n",
+       "50015         unknown            NaN              angel  2011-06-09   \n",
+       "50016         unknown            NaN              angel  2012-09-11   \n",
+       "50017         unknown            NaN              angel  2012-08-29   \n",
+       "50018         unknown            NaN           series-b  2012-07-09   \n",
+       "50019         unknown            NaN           series-a  2013-01-23   \n",
+       "50020         unknown            NaN              angel  2010-12-01   \n",
+       "50021         unknown            NaN              angel  2013-05-22   \n",
+       "50022         unknown            NaN              angel  2011-07-12   \n",
+       "50023         unknown            NaN           series-a  2011-04-18   \n",
+       "50024         unknown            NaN              angel  2011-06-27   \n",
+       "50025         unknown            NaN              angel  2011-05-01   \n",
+       "50026         unknown            NaN           series-a  2012-08-01   \n",
+       "50027         unknown            NaN           series-a  2013-02-19   \n",
+       "50028         unknown            NaN              angel  2012-01-01   \n",
+       "50029         unknown            NaN              angel  2012-08-13   \n",
+       "...               ...            ...                ...         ...   \n",
+       "52840         unknown            NaN              angel  2012-05-24   \n",
+       "52841         unknown            NaN           series-a  2013-02-19   \n",
+       "52842         unknown            NaN              angel  2012-01-01   \n",
+       "52843         unknown            NaN              angel  2012-08-13   \n",
+       "52844         unknown            NaN           series-a  2013-09-19   \n",
+       "52845         unknown            NaN              angel  2012-07-12   \n",
+       "52846         unknown            NaN              angel  2011-09-01   \n",
+       "52847         unknown            NaN           series-a  2009-11-01   \n",
+       "52848         unknown            NaN           series-a  2011-11-18   \n",
+       "52849         unknown            NaN              angel  2012-04-30   \n",
+       "52850         unknown            NaN            venture  2012-11-15   \n",
+       "52851         unknown            NaN           series-a  2012-12-01   \n",
+       "52852         unknown            NaN              angel  2012-01-01   \n",
+       "52853         unknown            NaN              angel  2011-01-01   \n",
+       "52854         unknown            NaN              angel  2011-05-30   \n",
+       "52855         unknown            NaN           series-b  2012-06-05   \n",
+       "52856         unknown            NaN              angel  2012-10-31   \n",
+       "52857         unknown            NaN           series-a  2011-02-01   \n",
+       "52858         unknown            NaN              angel  2010-10-01   \n",
+       "52859         unknown            NaN           series-b  2013-03-25   \n",
+       "52860         unknown            NaN              other  2009-11-10   \n",
+       "52861         unknown            NaN              angel  2013-08-01   \n",
+       "52862         unknown            NaN           series-a  2013-07-11   \n",
+       "52863         unknown            NaN           series-a  2005-01-01   \n",
+       "52864         unknown            NaN           series-b  2009-02-11   \n",
+       "52865         unknown            NaN           series-a  2012-08-08   \n",
+       "52866         unknown            NaN          series-c+  2013-04-08   \n",
+       "52867         unknown            NaN              angel  2010-08-01   \n",
+       "52868         unknown            NaN           series-a  2013-07-11   \n",
+       "52869         unknown            NaN           series-a  2012-12-04   \n",
+       "\n",
+       "      funded_month funded_quarter  funded_year  raised_amount_usd  \n",
+       "50000      2012-10        2012-Q4         2012          3060000.0  \n",
+       "50001      2007-10        2007-Q4         2007         12000000.0  \n",
+       "50002      2008-04        2008-Q2         2008           500000.0  \n",
+       "50003      2010-01        2010-Q1         2010           750000.0  \n",
+       "50004      2010-01        2010-Q1         2010                NaN  \n",
+       "50005      2007-11        2007-Q4         2007           485000.0  \n",
+       "50006      2012-04        2012-Q2         2012           300000.0  \n",
+       "50007      2012-12        2012-Q4         2012          5600000.0  \n",
+       "50008      2006-11        2006-Q4         2006         16000000.0  \n",
+       "50009      2012-11        2012-Q4         2012                NaN  \n",
+       "50010      2012-09        2012-Q3         2012                NaN  \n",
+       "50011      2012-10        2012-Q4         2012          1000000.0  \n",
+       "50012      2013-02        2013-Q1         2013          1630000.0  \n",
+       "50013      2011-10        2011-Q4         2011            30000.0  \n",
+       "50014      2008-02        2008-Q1         2008           900000.0  \n",
+       "50015      2011-06        2011-Q2         2011             8000.0  \n",
+       "50016      2012-09        2012-Q3         2012           150000.0  \n",
+       "50017      2012-08        2012-Q3         2012           400000.0  \n",
+       "50018      2012-07        2012-Q3         2012          7000000.0  \n",
+       "50019      2013-01        2013-Q1         2013          2288803.0  \n",
+       "50020      2010-12        2010-Q4         2010          1100000.0  \n",
+       "50021      2013-05        2013-Q2         2013          1100000.0  \n",
+       "50022      2011-07        2011-Q3         2011           650000.0  \n",
+       "50023      2011-04        2011-Q2         2011          2000000.0  \n",
+       "50024      2011-06        2011-Q2         2011          1000000.0  \n",
+       "50025      2011-05        2011-Q2         2011                NaN  \n",
+       "50026      2012-08        2012-Q3         2012          2500000.0  \n",
+       "50027      2013-02        2013-Q1         2013          3000000.0  \n",
+       "50028      2012-01        2012-Q1         2012                NaN  \n",
+       "50029      2012-08        2012-Q3         2012           900000.0  \n",
+       "...            ...            ...          ...                ...  \n",
+       "52840      2012-05        2012-Q2         2012           750000.0  \n",
+       "52841      2013-02        2013-Q1         2013          3000000.0  \n",
+       "52842      2012-01        2012-Q1         2012                NaN  \n",
+       "52843      2012-08        2012-Q3         2012           900000.0  \n",
+       "52844      2013-09        2013-Q3         2013          2800000.0  \n",
+       "52845      2012-07        2012-Q3         2012          1300000.0  \n",
+       "52846      2011-09        2011-Q3         2011           250000.0  \n",
+       "52847      2009-11        2009-Q4         2009         10000000.0  \n",
+       "52848      2011-11        2011-Q4         2011          1500000.0  \n",
+       "52849      2012-04        2012-Q2         2012                NaN  \n",
+       "52850      2012-11        2012-Q4         2012          1700000.0  \n",
+       "52851      2012-12        2012-Q4         2012          5600000.0  \n",
+       "52852      2012-01        2012-Q1         2012           300000.0  \n",
+       "52853      2011-01        2011-Q1         2011           300000.0  \n",
+       "52854      2011-05        2011-Q2         2011           800000.0  \n",
+       "52855      2012-06        2012-Q2         2012         21000000.0  \n",
+       "52856      2012-10        2012-Q4         2012          1250000.0  \n",
+       "52857      2011-02        2011-Q1         2011          1500000.0  \n",
+       "52858      2010-10        2010-Q4         2010            15000.0  \n",
+       "52859      2013-03        2013-Q1         2013          8000000.0  \n",
+       "52860      2009-11        2009-Q4         2009           195000.0  \n",
+       "52861      2013-08        2013-Q3         2013                NaN  \n",
+       "52862      2013-07        2013-Q3         2013          8400000.0  \n",
+       "52863      2005-01        2005-Q1         2005          2000000.0  \n",
+       "52864      2009-02        2009-Q1         2009         12000000.0  \n",
+       "52865      2012-08        2012-Q3         2012          3800000.0  \n",
+       "52866      2013-04        2013-Q2         2013         10300000.0  \n",
+       "52867      2010-08        2010-Q3         2010           350000.0  \n",
+       "52868      2013-07        2013-Q3         2013          8400000.0  \n",
+       "52869      2012-12        2012-Q4         2012          3000000.0  \n",
+       "\n",
+       "[2870 rows x 17 columns]"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chunk"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading Chunks Into SQLite"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sqlite3\n",
+    "conn = sqlite3.connect('crunchbase.db')\n",
+    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')\n",
+    "\n",
+    "for chunk in chunk_iter:\n",
+    "    chunk.to_sql(\"investments\", conn, if_exists='append', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 3566 - 0
Mission240Solutions.ipynb

@@ -0,0 +1,3566 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Introduction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 409,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "pd.options.display.max_columns = 999\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.model_selection import KFold\n",
+    "\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "from sklearn import linear_model\n",
+    "from sklearn.model_selection import KFold"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 333,
+   "metadata": {
+    "collapsed": true,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"AmesHousing.tsv\", delimiter=\"\\t\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 334,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "57088.251612639091"
+      ]
+     },
+     "execution_count": 334,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def transform_features(df):\n",
+    "    return df\n",
+    "\n",
+    "def select_features(df):\n",
+    "    return df[[\"Gr Liv Area\", \"SalePrice\"]]\n",
+    "\n",
+    "def train_and_test(df):  \n",
+    "    train = df[:1460]\n",
+    "    test = df[1460:]\n",
+    "    \n",
+    "    ## You can use `pd.DataFrame.select_dtypes()` to specify column types\n",
+    "    ## and return only those columns as a data frame.\n",
+    "    numeric_train = train.select_dtypes(include=['integer', 'float'])\n",
+    "    numeric_test = test.select_dtypes(include=['integer', 'float'])\n",
+    "    \n",
+    "    ## You can use `pd.Series.drop()` to drop a value.\n",
+    "    features = numeric_train.columns.drop(\"SalePrice\")\n",
+    "    lr = linear_model.LinearRegression()\n",
+    "    lr.fit(train[features], train[\"SalePrice\"])\n",
+    "    predictions = lr.predict(test[features])\n",
+    "    mse = mean_squared_error(test[\"SalePrice\"], predictions)\n",
+    "    rmse = np.sqrt(mse)\n",
+    "    \n",
+    "    return rmse\n",
+    "\n",
+    "transform_df = transform_features(df)\n",
+    "filtered_df = select_features(transform_df)\n",
+    "rmse = train_and_test(filtered_df)\n",
+    "\n",
+    "rmse"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Feature Engineering\n",
+    "\n",
+    "- Handle missing values:\n",
+    "    - All columns:\n",
+    "        - Drop any with 5% or more missing values **for now**.\n",
+    "    - Text columns:\n",
+    "        - Drop any with 1 or more missing values **for now**.\n",
+    "    - Numerical columns:\n",
+    "        - For columns with missing values, fill in with the most common value in that column"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "1: All columns: Drop any with 5% or more missing values **for now**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 296,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "## Series object: column name -> number of missing values\n",
+    "num_missing = df.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 297,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Filter Series to columns containing >5% missing values\n",
+    "drop_missing_cols = num_missing[(num_missing > len(df)/20)].sort_values()\n",
+    "\n",
+    "# Drop those columns from the data frame. Note the use of the .index accessor\n",
+    "df = df.drop(drop_missing_cols.index, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "2: Text columns: Drop any with 1 or more missing values **for now**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 298,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "## Series object: column name -> number of missing values\n",
+    "text_mv_counts = df.select_dtypes(include=['object']).isnull().sum().sort_values(ascending=False)\n",
+    "\n",
+    "## Filter Series to columns containing *any* missing values\n",
+    "drop_missing_cols_2 = text_mv_counts[text_mv_counts > 0]\n",
+    "\n",
+    "df = df.drop(drop_missing_cols_2.index, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "3: Numerical columns: For columns with missing values, fill in with the most common value in that column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 299,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BsmtFin SF 1       1\n",
+       "BsmtFin SF 2       1\n",
+       "Bsmt Unf SF        1\n",
+       "Total Bsmt SF      1\n",
+       "Garage Cars        1\n",
+       "Garage Area        1\n",
+       "Bsmt Full Bath     2\n",
+       "Bsmt Half Bath     2\n",
+       "Mas Vnr Area      23\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 299,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## Compute column-wise missing value counts\n",
+    "num_missing = df.select_dtypes(include=['int', 'float']).isnull().sum()\n",
+    "fixable_numeric_cols = num_missing[(num_missing < len(df)/20) & (num_missing > 0)].sort_values()\n",
+    "fixable_numeric_cols"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 307,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Bsmt Full Bath': 0.0,\n",
+       " 'Bsmt Half Bath': 0.0,\n",
+       " 'Bsmt Unf SF': 0.0,\n",
+       " 'BsmtFin SF 1': 0.0,\n",
+       " 'BsmtFin SF 2': 0.0,\n",
+       " 'Garage Area': 0.0,\n",
+       " 'Garage Cars': 2.0,\n",
+       " 'Mas Vnr Area': 0.0,\n",
+       " 'Total Bsmt SF': 0.0}"
+      ]
+     },
+     "execution_count": 307,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## Compute the most common value for each column in `fixable_nmeric_missing_cols`.\n",
+    "replacement_values_dict = df[fixable_numeric_cols.index].mode().to_dict(orient='records')[0]\n",
+    "replacement_values_dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 308,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Use `pd.DataFrame.fillna()` to replace missing values.\n",
+    "df = df.fillna(replacement_values_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 311,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    64\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 311,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## Verify that every column has 0 missing values\n",
+    "df.isnull().sum().value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What new features can we create, that better capture the information in some of the features?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 320,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2180   -1\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 320,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "years_sold = df['Yr Sold'] - df['Year Built']\n",
+    "years_sold[years_sold < 0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 322,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1702   -1\n",
+       "2180   -2\n",
+       "2181   -1\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 322,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "years_since_remod = df['Yr Sold'] - df['Year Remod/Add']\n",
+    "years_since_remod[years_since_remod < 0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 329,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "## Create new columns\n",
+    "df['Years Before Sale'] = years_sold\n",
+    "df['Years Since Remod'] = years_since_remod\n",
+    "\n",
+    "## Drop rows with negative values for both of these new features\n",
+    "df = df.drop([1702, 2180, 2181], axis=0)\n",
+    "\n",
+    "## No longer need original year columns\n",
+    "df = df.drop([\"Year Built\", \"Year Remod/Add\"], axis = 1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Drop columns that:\n",
+    "- that aren't useful for ML\n",
+    "- leak data about the final sale, read more about columns [here](https://ww2.amstat.org/publications/jse/v19n3/decock/DataDocumentation.txt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 327,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Drop columns that aren't useful for ML\n",
+    "df = df.drop([\"PID\", \"Order\"], axis=1)\n",
+    "\n",
+    "## Drop columns that leak info about the final sale\n",
+    "df = df.drop([\"Mo Sold\", \"Sale Condition\", \"Sale Type\", \"Yr Sold\"], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's update transform_features()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 340,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "55275.367312413066"
+      ]
+     },
+     "execution_count": 340,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def transform_features(df):\n",
+    "    num_missing = df.isnull().sum()\n",
+    "    drop_missing_cols = num_missing[(num_missing > len(df)/20)].sort_values()\n",
+    "    df = df.drop(drop_missing_cols.index, axis=1)\n",
+    "    \n",
+    "    text_mv_counts = df.select_dtypes(include=['object']).isnull().sum().sort_values(ascending=False)\n",
+    "    drop_missing_cols_2 = text_mv_counts[text_mv_counts > 0]\n",
+    "    df = df.drop(drop_missing_cols_2.index, axis=1)\n",
+    "    \n",
+    "    num_missing = df.select_dtypes(include=['int', 'float']).isnull().sum()\n",
+    "    fixable_numeric_cols = num_missing[(num_missing < len(df)/20) & (num_missing > 0)].sort_values()\n",
+    "    replacement_values_dict = df[fixable_numeric_cols.index].mode().to_dict(orient='records')[0]\n",
+    "    df = df.fillna(replacement_values_dict)\n",
+    "    \n",
+    "    years_sold = df['Yr Sold'] - df['Year Built']\n",
+    "    years_since_remod = df['Yr Sold'] - df['Year Remod/Add']\n",
+    "    df['Years Before Sale'] = years_sold\n",
+    "    df['Years Since Remod'] = years_since_remod\n",
+    "    df = df.drop([1702, 2180, 2181], axis=0)\n",
+    "\n",
+    "    df = df.drop([\"PID\", \"Order\", \"Mo Sold\", \"Sale Condition\", \"Sale Type\", \"Year Built\", \"Year Remod/Add\"], axis=1)\n",
+    "    return df\n",
+    "\n",
+    "def select_features(df):\n",
+    "    return df[[\"Gr Liv Area\", \"SalePrice\"]]\n",
+    "\n",
+    "def train_and_test(df):  \n",
+    "    train = df[:1460]\n",
+    "    test = df[1460:]\n",
+    "    \n",
+    "    ## You can use `pd.DataFrame.select_dtypes()` to specify column types\n",
+    "    ## and return only those columns as a data frame.\n",
+    "    numeric_train = train.select_dtypes(include=['integer', 'float'])\n",
+    "    numeric_test = test.select_dtypes(include=['integer', 'float'])\n",
+    "    \n",
+    "    ## You can use `pd.Series.drop()` to drop a value.\n",
+    "    features = numeric_train.columns.drop(\"SalePrice\")\n",
+    "    lr = linear_model.LinearRegression()\n",
+    "    lr.fit(train[features], train[\"SalePrice\"])\n",
+    "    predictions = lr.predict(test[features])\n",
+    "    mse = mean_squared_error(test[\"SalePrice\"], predictions)\n",
+    "    rmse = np.sqrt(mse)\n",
+    "    \n",
+    "    return rmse\n",
+    "\n",
+    "df = pd.read_csv(\"AmesHousing.tsv\", delimiter=\"\\t\")\n",
+    "transform_df = transform_features(df)\n",
+    "filtered_df = select_features(transform_df)\n",
+    "rmse = train_and_test(filtered_df)\n",
+    "\n",
+    "rmse"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "## Feature Selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 389,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MS SubClass</th>\n",
+       "      <th>Lot Area</th>\n",
+       "      <th>Overall Qual</th>\n",
+       "      <th>Overall Cond</th>\n",
+       "      <th>Mas Vnr Area</th>\n",
+       "      <th>BsmtFin SF 1</th>\n",
+       "      <th>BsmtFin SF 2</th>\n",
+       "      <th>Bsmt Unf SF</th>\n",
+       "      <th>Total Bsmt SF</th>\n",
+       "      <th>1st Flr SF</th>\n",
+       "      <th>2nd Flr SF</th>\n",
+       "      <th>Low Qual Fin SF</th>\n",
+       "      <th>Gr Liv Area</th>\n",
+       "      <th>Bsmt Full Bath</th>\n",
+       "      <th>Bsmt Half Bath</th>\n",
+       "      <th>Full Bath</th>\n",
+       "      <th>Half Bath</th>\n",
+       "      <th>Bedroom AbvGr</th>\n",
+       "      <th>Kitchen AbvGr</th>\n",
+       "      <th>TotRms AbvGrd</th>\n",
+       "      <th>Fireplaces</th>\n",
+       "      <th>Garage Cars</th>\n",
+       "      <th>Garage Area</th>\n",
+       "      <th>Wood Deck SF</th>\n",
+       "      <th>Open Porch SF</th>\n",
+       "      <th>Enclosed Porch</th>\n",
+       "      <th>3Ssn Porch</th>\n",
+       "      <th>Screen Porch</th>\n",
+       "      <th>Pool Area</th>\n",
+       "      <th>Misc Val</th>\n",
+       "      <th>Yr Sold</th>\n",
+       "      <th>SalePrice</th>\n",
+       "      <th>Years Before Sale</th>\n",
+       "      <th>Years Since Remod</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>20</td>\n",
+       "      <td>31770</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>112.0</td>\n",
+       "      <td>639.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>441.0</td>\n",
+       "      <td>1080.0</td>\n",
+       "      <td>1656</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1656</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>528.0</td>\n",
+       "      <td>210</td>\n",
+       "      <td>62</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>215000</td>\n",
+       "      <td>50</td>\n",
+       "      <td>50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>20</td>\n",
+       "      <td>11622</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>468.0</td>\n",
+       "      <td>144.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>882.0</td>\n",
+       "      <td>896</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>896</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>730.0</td>\n",
+       "      <td>140</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>120</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>105000</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>20</td>\n",
+       "      <td>14267</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>923.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>406.0</td>\n",
+       "      <td>1329.0</td>\n",
+       "      <td>1329</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1329</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>312.0</td>\n",
+       "      <td>393</td>\n",
+       "      <td>36</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>12500</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>172000</td>\n",
+       "      <td>52</td>\n",
+       "      <td>52</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>20</td>\n",
+       "      <td>11160</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1065.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1045.0</td>\n",
+       "      <td>2110.0</td>\n",
+       "      <td>2110</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2110</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>522.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>244000</td>\n",
+       "      <td>42</td>\n",
+       "      <td>42</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>60</td>\n",
+       "      <td>13830</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>791.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>137.0</td>\n",
+       "      <td>928.0</td>\n",
+       "      <td>928</td>\n",
+       "      <td>701</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1629</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>482.0</td>\n",
+       "      <td>212</td>\n",
+       "      <td>34</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>189900</td>\n",
+       "      <td>13</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>60</td>\n",
+       "      <td>9978</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>602.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>324.0</td>\n",
+       "      <td>926.0</td>\n",
+       "      <td>926</td>\n",
+       "      <td>678</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1604</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>470.0</td>\n",
+       "      <td>360</td>\n",
+       "      <td>36</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>195500</td>\n",
+       "      <td>12</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>120</td>\n",
+       "      <td>4920</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>616.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>722.0</td>\n",
+       "      <td>1338.0</td>\n",
+       "      <td>1338</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1338</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>582.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>170</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>213500</td>\n",
+       "      <td>9</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>120</td>\n",
+       "      <td>5005</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>263.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1017.0</td>\n",
+       "      <td>1280.0</td>\n",
+       "      <td>1280</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1280</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>506.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>82</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>191500</td>\n",
+       "      <td>18</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>120</td>\n",
+       "      <td>5389</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1180.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>415.0</td>\n",
+       "      <td>1595.0</td>\n",
+       "      <td>1616</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1616</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>608.0</td>\n",
+       "      <td>237</td>\n",
+       "      <td>152</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>236500</td>\n",
+       "      <td>15</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>60</td>\n",
+       "      <td>7500</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>994.0</td>\n",
+       "      <td>994.0</td>\n",
+       "      <td>1028</td>\n",
+       "      <td>776</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1804</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>442.0</td>\n",
+       "      <td>140</td>\n",
+       "      <td>60</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>189000</td>\n",
+       "      <td>11</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>60</td>\n",
+       "      <td>10000</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>763.0</td>\n",
+       "      <td>763.0</td>\n",
+       "      <td>763</td>\n",
+       "      <td>892</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1655</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>440.0</td>\n",
+       "      <td>157</td>\n",
+       "      <td>84</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>175900</td>\n",
+       "      <td>17</td>\n",
+       "      <td>16</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>20</td>\n",
+       "      <td>7980</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>935.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>233.0</td>\n",
+       "      <td>1168.0</td>\n",
+       "      <td>1187</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1187</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>420.0</td>\n",
+       "      <td>483</td>\n",
+       "      <td>21</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>500</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>185000</td>\n",
+       "      <td>18</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>60</td>\n",
+       "      <td>8402</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>789.0</td>\n",
+       "      <td>789.0</td>\n",
+       "      <td>789</td>\n",
+       "      <td>676</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1465</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>75</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>180400</td>\n",
+       "      <td>12</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>20</td>\n",
+       "      <td>10176</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>637.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>663.0</td>\n",
+       "      <td>1300.0</td>\n",
+       "      <td>1341</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1341</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>506.0</td>\n",
+       "      <td>192</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>171500</td>\n",
+       "      <td>20</td>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>120</td>\n",
+       "      <td>6820</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>368.0</td>\n",
+       "      <td>1120.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1488.0</td>\n",
+       "      <td>1502</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1502</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>528.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>54</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>140</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>212000</td>\n",
+       "      <td>25</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>60</td>\n",
+       "      <td>53504</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>603.0</td>\n",
+       "      <td>1416.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>234.0</td>\n",
+       "      <td>1650.0</td>\n",
+       "      <td>1690</td>\n",
+       "      <td>1589</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3279</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>841.0</td>\n",
+       "      <td>503</td>\n",
+       "      <td>36</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>210</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>538000</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>50</td>\n",
+       "      <td>12134</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>427.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>132.0</td>\n",
+       "      <td>559.0</td>\n",
+       "      <td>1080</td>\n",
+       "      <td>672</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1752</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>492.0</td>\n",
+       "      <td>325</td>\n",
+       "      <td>12</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>164000</td>\n",
+       "      <td>22</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>20</td>\n",
+       "      <td>11394</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>1445.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>1856.0</td>\n",
+       "      <td>1856</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1856</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>834.0</td>\n",
+       "      <td>113</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>394432</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>20</td>\n",
+       "      <td>19138</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>744.0</td>\n",
+       "      <td>864.0</td>\n",
+       "      <td>864</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>864</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>400.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>141000</td>\n",
+       "      <td>59</td>\n",
+       "      <td>59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>20</td>\n",
+       "      <td>13175</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>119.0</td>\n",
+       "      <td>790.0</td>\n",
+       "      <td>163.0</td>\n",
+       "      <td>589.0</td>\n",
+       "      <td>1542.0</td>\n",
+       "      <td>2073</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2073</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>500.0</td>\n",
+       "      <td>349</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>210000</td>\n",
+       "      <td>32</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>20</td>\n",
+       "      <td>11751</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>480.0</td>\n",
+       "      <td>705.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1139.0</td>\n",
+       "      <td>1844.0</td>\n",
+       "      <td>1844</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1844</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>122</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>190000</td>\n",
+       "      <td>33</td>\n",
+       "      <td>33</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>85</td>\n",
+       "      <td>10625</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>885.0</td>\n",
+       "      <td>168.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1053.0</td>\n",
+       "      <td>1173</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1173</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>528.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>120</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>170000</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>60</td>\n",
+       "      <td>7500</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>533.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>281.0</td>\n",
+       "      <td>814.0</td>\n",
+       "      <td>814</td>\n",
+       "      <td>860</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1674</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>663.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>96</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>216000</td>\n",
+       "      <td>10</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>20</td>\n",
+       "      <td>11241</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>578.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>426.0</td>\n",
+       "      <td>1004.0</td>\n",
+       "      <td>1004</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1004</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>480.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>700</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>149000</td>\n",
+       "      <td>40</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>20</td>\n",
+       "      <td>12537</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>734.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>344.0</td>\n",
+       "      <td>1078.0</td>\n",
+       "      <td>1078</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1078</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>500.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>149900</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>20</td>\n",
+       "      <td>8450</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>775.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>281.0</td>\n",
+       "      <td>1056.0</td>\n",
+       "      <td>1056</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1056</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>85</td>\n",
+       "      <td>184</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>142000</td>\n",
+       "      <td>42</td>\n",
+       "      <td>42</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>20</td>\n",
+       "      <td>8400</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>804.0</td>\n",
+       "      <td>78.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>882.0</td>\n",
+       "      <td>882</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>882</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>525.0</td>\n",
+       "      <td>240</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>126000</td>\n",
+       "      <td>40</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>20</td>\n",
+       "      <td>10500</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>432.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>432.0</td>\n",
+       "      <td>864.0</td>\n",
+       "      <td>864</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>864</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>115000</td>\n",
+       "      <td>39</td>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>120</td>\n",
+       "      <td>5858</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1051.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>1405.0</td>\n",
+       "      <td>1337</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1337</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>511.0</td>\n",
+       "      <td>203</td>\n",
+       "      <td>68</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>184000</td>\n",
+       "      <td>11</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>160</td>\n",
+       "      <td>1680</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>504.0</td>\n",
+       "      <td>156.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>327.0</td>\n",
+       "      <td>483.0</td>\n",
+       "      <td>483</td>\n",
+       "      <td>504</td>\n",
+       "      <td>0</td>\n",
+       "      <td>987</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>264.0</td>\n",
+       "      <td>275</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>96000</td>\n",
+       "      <td>39</td>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2900</th>\n",
+       "      <td>20</td>\n",
+       "      <td>13618</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>198.0</td>\n",
+       "      <td>1350.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>378.0</td>\n",
+       "      <td>1728.0</td>\n",
+       "      <td>1960</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1960</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>714.0</td>\n",
+       "      <td>172</td>\n",
+       "      <td>38</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>320000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2901</th>\n",
+       "      <td>20</td>\n",
+       "      <td>11443</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>208.0</td>\n",
+       "      <td>1460.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>408.0</td>\n",
+       "      <td>1868.0</td>\n",
+       "      <td>2028</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2028</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>880.0</td>\n",
+       "      <td>326</td>\n",
+       "      <td>66</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>369900</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2902</th>\n",
+       "      <td>20</td>\n",
+       "      <td>11577</td>\n",
+       "      <td>9</td>\n",
+       "      <td>5</td>\n",
+       "      <td>382.0</td>\n",
+       "      <td>1455.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>383.0</td>\n",
+       "      <td>1838.0</td>\n",
+       "      <td>1838</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1838</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>682.0</td>\n",
+       "      <td>161</td>\n",
+       "      <td>225</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>359900</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2903</th>\n",
+       "      <td>20</td>\n",
+       "      <td>31250</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1600</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1600</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>135</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>81500</td>\n",
+       "      <td>55</td>\n",
+       "      <td>55</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2904</th>\n",
+       "      <td>90</td>\n",
+       "      <td>7020</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>200.0</td>\n",
+       "      <td>1243.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>1288.0</td>\n",
+       "      <td>1368</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1368</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>784.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>48</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>215000</td>\n",
+       "      <td>9</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2905</th>\n",
+       "      <td>120</td>\n",
+       "      <td>4500</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>116.0</td>\n",
+       "      <td>897.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>319.0</td>\n",
+       "      <td>1216.0</td>\n",
+       "      <td>1216</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1216</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>402.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>125</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>164000</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2906</th>\n",
+       "      <td>120</td>\n",
+       "      <td>4500</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>443.0</td>\n",
+       "      <td>1201.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>1237.0</td>\n",
+       "      <td>1337</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1337</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>405.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>199</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>153500</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2907</th>\n",
+       "      <td>20</td>\n",
+       "      <td>17217</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1140.0</td>\n",
+       "      <td>1140.0</td>\n",
+       "      <td>1140</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1140</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>36</td>\n",
+       "      <td>56</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>84500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2908</th>\n",
+       "      <td>160</td>\n",
+       "      <td>2665</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>264.0</td>\n",
+       "      <td>264.0</td>\n",
+       "      <td>616</td>\n",
+       "      <td>688</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1304</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>336.0</td>\n",
+       "      <td>141</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>104500</td>\n",
+       "      <td>29</td>\n",
+       "      <td>29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2909</th>\n",
+       "      <td>160</td>\n",
+       "      <td>2665</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>548.0</td>\n",
+       "      <td>173.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>757.0</td>\n",
+       "      <td>925</td>\n",
+       "      <td>550</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1475</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>336.0</td>\n",
+       "      <td>104</td>\n",
+       "      <td>26</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>127000</td>\n",
+       "      <td>29</td>\n",
+       "      <td>29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2910</th>\n",
+       "      <td>160</td>\n",
+       "      <td>3964</td>\n",
+       "      <td>6</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>837.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>942.0</td>\n",
+       "      <td>1291</td>\n",
+       "      <td>1230</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2521</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>576.0</td>\n",
+       "      <td>728</td>\n",
+       "      <td>20</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>151400</td>\n",
+       "      <td>33</td>\n",
+       "      <td>33</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2911</th>\n",
+       "      <td>20</td>\n",
+       "      <td>10172</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>441.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>423.0</td>\n",
+       "      <td>864.0</td>\n",
+       "      <td>874</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>874</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>288.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>120</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>126500</td>\n",
+       "      <td>38</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2912</th>\n",
+       "      <td>90</td>\n",
+       "      <td>11836</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>149.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1503.0</td>\n",
+       "      <td>1652.0</td>\n",
+       "      <td>1652</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1652</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>928.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>146500</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2913</th>\n",
+       "      <td>180</td>\n",
+       "      <td>1470</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>522.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>630.0</td>\n",
+       "      <td>630</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>630</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>73000</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2914</th>\n",
+       "      <td>160</td>\n",
+       "      <td>1484</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>252.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>294.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>546</td>\n",
+       "      <td>546</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1092</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>253.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>79400</td>\n",
+       "      <td>34</td>\n",
+       "      <td>34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2915</th>\n",
+       "      <td>20</td>\n",
+       "      <td>13384</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>194.0</td>\n",
+       "      <td>119.0</td>\n",
+       "      <td>344.0</td>\n",
+       "      <td>641.0</td>\n",
+       "      <td>1104.0</td>\n",
+       "      <td>1360</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1360</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>336.0</td>\n",
+       "      <td>160</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>140000</td>\n",
+       "      <td>37</td>\n",
+       "      <td>27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2916</th>\n",
+       "      <td>180</td>\n",
+       "      <td>1533</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>553.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>77.0</td>\n",
+       "      <td>630.0</td>\n",
+       "      <td>630</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>630</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>92000</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2917</th>\n",
+       "      <td>160</td>\n",
+       "      <td>1533</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>408.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>138.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>546</td>\n",
+       "      <td>546</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1092</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>286.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>87550</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2918</th>\n",
+       "      <td>160</td>\n",
+       "      <td>1526</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>546</td>\n",
+       "      <td>546</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1092</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>34</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>79500</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2919</th>\n",
+       "      <td>160</td>\n",
+       "      <td>1936</td>\n",
+       "      <td>4</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>546</td>\n",
+       "      <td>546</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1092</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>90500</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2920</th>\n",
+       "      <td>160</td>\n",
+       "      <td>1894</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>252.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>294.0</td>\n",
+       "      <td>546.0</td>\n",
+       "      <td>546</td>\n",
+       "      <td>546</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1092</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>286.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>24</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>71000</td>\n",
+       "      <td>36</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2921</th>\n",
+       "      <td>90</td>\n",
+       "      <td>12640</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>936.0</td>\n",
+       "      <td>396.0</td>\n",
+       "      <td>396.0</td>\n",
+       "      <td>1728.0</td>\n",
+       "      <td>1728</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1728</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>574.0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>150900</td>\n",
+       "      <td>30</td>\n",
+       "      <td>30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2922</th>\n",
+       "      <td>90</td>\n",
+       "      <td>9297</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1606.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>122.0</td>\n",
+       "      <td>1728.0</td>\n",
+       "      <td>1728</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1728</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>560.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>188000</td>\n",
+       "      <td>30</td>\n",
+       "      <td>30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2923</th>\n",
+       "      <td>20</td>\n",
+       "      <td>17400</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>936.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>1126.0</td>\n",
+       "      <td>1126</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1126</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>484.0</td>\n",
+       "      <td>295</td>\n",
+       "      <td>41</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>160000</td>\n",
+       "      <td>29</td>\n",
+       "      <td>29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2924</th>\n",
+       "      <td>20</td>\n",
+       "      <td>20000</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1224.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1224.0</td>\n",
+       "      <td>1224</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1224</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>576.0</td>\n",
+       "      <td>474</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>131000</td>\n",
+       "      <td>46</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2925</th>\n",
+       "      <td>80</td>\n",
+       "      <td>7937</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>819.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>184.0</td>\n",
+       "      <td>1003.0</td>\n",
+       "      <td>1003</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1003</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>588.0</td>\n",
+       "      <td>120</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>142500</td>\n",
+       "      <td>22</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2926</th>\n",
+       "      <td>20</td>\n",
+       "      <td>8885</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>301.0</td>\n",
+       "      <td>324.0</td>\n",
+       "      <td>239.0</td>\n",
+       "      <td>864.0</td>\n",
+       "      <td>902</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>902</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>484.0</td>\n",
+       "      <td>164</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>131000</td>\n",
+       "      <td>23</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2927</th>\n",
+       "      <td>85</td>\n",
+       "      <td>10441</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>337.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>575.0</td>\n",
+       "      <td>912.0</td>\n",
+       "      <td>970</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>970</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>80</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>700</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>132000</td>\n",
+       "      <td>14</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2928</th>\n",
+       "      <td>20</td>\n",
+       "      <td>10010</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1071.0</td>\n",
+       "      <td>123.0</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>1389.0</td>\n",
+       "      <td>1389</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1389</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>418.0</td>\n",
+       "      <td>240</td>\n",
+       "      <td>38</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>170000</td>\n",
+       "      <td>32</td>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2929</th>\n",
+       "      <td>60</td>\n",
+       "      <td>9627</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>758.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>238.0</td>\n",
+       "      <td>996.0</td>\n",
+       "      <td>996</td>\n",
+       "      <td>1004</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>650.0</td>\n",
+       "      <td>190</td>\n",
+       "      <td>48</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>188000</td>\n",
+       "      <td>13</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2927 rows × 34 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      MS SubClass  Lot Area  Overall Qual  Overall Cond  Mas Vnr Area  \\\n",
+       "0              20     31770             6             5         112.0   \n",
+       "1              20     11622             5             6           0.0   \n",
+       "2              20     14267             6             6         108.0   \n",
+       "3              20     11160             7             5           0.0   \n",
+       "4              60     13830             5             5           0.0   \n",
+       "5              60      9978             6             6          20.0   \n",
+       "6             120      4920             8             5           0.0   \n",
+       "7             120      5005             8             5           0.0   \n",
+       "8             120      5389             8             5           0.0   \n",
+       "9              60      7500             7             5           0.0   \n",
+       "10             60     10000             6             5           0.0   \n",
+       "11             20      7980             6             7           0.0   \n",
+       "12             60      8402             6             5           0.0   \n",
+       "13             20     10176             7             5           0.0   \n",
+       "14            120      6820             8             5           0.0   \n",
+       "15             60     53504             8             5         603.0   \n",
+       "16             50     12134             8             7           0.0   \n",
+       "17             20     11394             9             2         350.0   \n",
+       "18             20     19138             4             5           0.0   \n",
+       "19             20     13175             6             6         119.0   \n",
+       "20             20     11751             6             6         480.0   \n",
+       "21             85     10625             7             6          81.0   \n",
+       "22             60      7500             7             5           0.0   \n",
+       "23             20     11241             6             7         180.0   \n",
+       "24             20     12537             5             6           0.0   \n",
+       "25             20      8450             5             6           0.0   \n",
+       "26             20      8400             4             5           0.0   \n",
+       "27             20     10500             4             5           0.0   \n",
+       "28            120      5858             7             5           0.0   \n",
+       "29            160      1680             6             5         504.0   \n",
+       "...           ...       ...           ...           ...           ...   \n",
+       "2900           20     13618             8             5         198.0   \n",
+       "2901           20     11443             8             5         208.0   \n",
+       "2902           20     11577             9             5         382.0   \n",
+       "2903           20     31250             1             3           0.0   \n",
+       "2904           90      7020             7             5         200.0   \n",
+       "2905          120      4500             6             5         116.0   \n",
+       "2906          120      4500             6             5         443.0   \n",
+       "2907           20     17217             5             5           0.0   \n",
+       "2908          160      2665             5             6           0.0   \n",
+       "2909          160      2665             5             6           0.0   \n",
+       "2910          160      3964             6             4           0.0   \n",
+       "2911           20     10172             5             7           0.0   \n",
+       "2912           90     11836             5             5           0.0   \n",
+       "2913          180      1470             4             6           0.0   \n",
+       "2914          160      1484             4             4           0.0   \n",
+       "2915           20     13384             5             5         194.0   \n",
+       "2916          180      1533             5             7           0.0   \n",
+       "2917          160      1533             4             5           0.0   \n",
+       "2918          160      1526             4             5           0.0   \n",
+       "2919          160      1936             4             7           0.0   \n",
+       "2920          160      1894             4             5           0.0   \n",
+       "2921           90     12640             6             5           0.0   \n",
+       "2922           90      9297             5             5           0.0   \n",
+       "2923           20     17400             5             5           0.0   \n",
+       "2924           20     20000             5             7           0.0   \n",
+       "2925           80      7937             6             6           0.0   \n",
+       "2926           20      8885             5             5           0.0   \n",
+       "2927           85     10441             5             5           0.0   \n",
+       "2928           20     10010             5             5           0.0   \n",
+       "2929           60      9627             7             5          94.0   \n",
+       "\n",
+       "      BsmtFin SF 1  BsmtFin SF 2  Bsmt Unf SF  Total Bsmt SF  1st Flr SF  \\\n",
+       "0            639.0           0.0        441.0         1080.0        1656   \n",
+       "1            468.0         144.0        270.0          882.0         896   \n",
+       "2            923.0           0.0        406.0         1329.0        1329   \n",
+       "3           1065.0           0.0       1045.0         2110.0        2110   \n",
+       "4            791.0           0.0        137.0          928.0         928   \n",
+       "5            602.0           0.0        324.0          926.0         926   \n",
+       "6            616.0           0.0        722.0         1338.0        1338   \n",
+       "7            263.0           0.0       1017.0         1280.0        1280   \n",
+       "8           1180.0           0.0        415.0         1595.0        1616   \n",
+       "9              0.0           0.0        994.0          994.0        1028   \n",
+       "10             0.0           0.0        763.0          763.0         763   \n",
+       "11           935.0           0.0        233.0         1168.0        1187   \n",
+       "12             0.0           0.0        789.0          789.0         789   \n",
+       "13           637.0           0.0        663.0         1300.0        1341   \n",
+       "14           368.0        1120.0          0.0         1488.0        1502   \n",
+       "15          1416.0           0.0        234.0         1650.0        1690   \n",
+       "16           427.0           0.0        132.0          559.0        1080   \n",
+       "17          1445.0           0.0        411.0         1856.0        1856   \n",
+       "18           120.0           0.0        744.0          864.0         864   \n",
+       "19           790.0         163.0        589.0         1542.0        2073   \n",
+       "20           705.0           0.0       1139.0         1844.0        1844   \n",
+       "21           885.0         168.0          0.0         1053.0        1173   \n",
+       "22           533.0           0.0        281.0          814.0         814   \n",
+       "23           578.0           0.0        426.0         1004.0        1004   \n",
+       "24           734.0           0.0        344.0         1078.0        1078   \n",
+       "25           775.0           0.0        281.0         1056.0        1056   \n",
+       "26           804.0          78.0          0.0          882.0         882   \n",
+       "27           432.0           0.0        432.0          864.0         864   \n",
+       "28          1051.0           0.0        354.0         1405.0        1337   \n",
+       "29           156.0           0.0        327.0          483.0         483   \n",
+       "...            ...           ...          ...            ...         ...   \n",
+       "2900        1350.0           0.0        378.0         1728.0        1960   \n",
+       "2901        1460.0           0.0        408.0         1868.0        2028   \n",
+       "2902        1455.0           0.0        383.0         1838.0        1838   \n",
+       "2903           0.0           0.0          0.0            0.0        1600   \n",
+       "2904        1243.0           0.0         45.0         1288.0        1368   \n",
+       "2905         897.0           0.0        319.0         1216.0        1216   \n",
+       "2906        1201.0           0.0         36.0         1237.0        1337   \n",
+       "2907           0.0           0.0       1140.0         1140.0        1140   \n",
+       "2908           0.0           0.0        264.0          264.0         616   \n",
+       "2909         548.0         173.0         36.0          757.0         925   \n",
+       "2910         837.0           0.0        105.0          942.0        1291   \n",
+       "2911         441.0           0.0        423.0          864.0         874   \n",
+       "2912         149.0           0.0       1503.0         1652.0        1652   \n",
+       "2913         522.0           0.0        108.0          630.0         630   \n",
+       "2914         252.0           0.0        294.0          546.0         546   \n",
+       "2915         119.0         344.0        641.0         1104.0        1360   \n",
+       "2916         553.0           0.0         77.0          630.0         630   \n",
+       "2917         408.0           0.0        138.0          546.0         546   \n",
+       "2918           0.0           0.0        546.0          546.0         546   \n",
+       "2919           0.0           0.0        546.0          546.0         546   \n",
+       "2920         252.0           0.0        294.0          546.0         546   \n",
+       "2921         936.0         396.0        396.0         1728.0        1728   \n",
+       "2922        1606.0           0.0        122.0         1728.0        1728   \n",
+       "2923         936.0           0.0        190.0         1126.0        1126   \n",
+       "2924        1224.0           0.0          0.0         1224.0        1224   \n",
+       "2925         819.0           0.0        184.0         1003.0        1003   \n",
+       "2926         301.0         324.0        239.0          864.0         902   \n",
+       "2927         337.0           0.0        575.0          912.0         970   \n",
+       "2928        1071.0         123.0        195.0         1389.0        1389   \n",
+       "2929         758.0           0.0        238.0          996.0         996   \n",
+       "\n",
+       "      2nd Flr SF  Low Qual Fin SF  Gr Liv Area  Bsmt Full Bath  \\\n",
+       "0              0                0         1656             1.0   \n",
+       "1              0                0          896             0.0   \n",
+       "2              0                0         1329             0.0   \n",
+       "3              0                0         2110             1.0   \n",
+       "4            701                0         1629             0.0   \n",
+       "5            678                0         1604             0.0   \n",
+       "6              0                0         1338             1.0   \n",
+       "7              0                0         1280             0.0   \n",
+       "8              0                0         1616             1.0   \n",
+       "9            776                0         1804             0.0   \n",
+       "10           892                0         1655             0.0   \n",
+       "11             0                0         1187             1.0   \n",
+       "12           676                0         1465             0.0   \n",
+       "13             0                0         1341             1.0   \n",
+       "14             0                0         1502             1.0   \n",
+       "15          1589                0         3279             1.0   \n",
+       "16           672                0         1752             0.0   \n",
+       "17             0                0         1856             1.0   \n",
+       "18             0                0          864             0.0   \n",
+       "19             0                0         2073             1.0   \n",
+       "20             0                0         1844             0.0   \n",
+       "21             0                0         1173             1.0   \n",
+       "22           860                0         1674             1.0   \n",
+       "23             0                0         1004             1.0   \n",
+       "24             0                0         1078             1.0   \n",
+       "25             0                0         1056             1.0   \n",
+       "26             0                0          882             1.0   \n",
+       "27             0                0          864             0.0   \n",
+       "28             0                0         1337             1.0   \n",
+       "29           504                0          987             0.0   \n",
+       "...          ...              ...          ...             ...   \n",
+       "2900           0                0         1960             1.0   \n",
+       "2901           0                0         2028             1.0   \n",
+       "2902           0                0         1838             1.0   \n",
+       "2903           0                0         1600             0.0   \n",
+       "2904           0                0         1368             2.0   \n",
+       "2905           0                0         1216             1.0   \n",
+       "2906           0                0         1337             1.0   \n",
+       "2907           0                0         1140             0.0   \n",
+       "2908         688                0         1304             0.0   \n",
+       "2909         550                0         1475             0.0   \n",
+       "2910        1230                0         2521             1.0   \n",
+       "2911           0                0          874             1.0   \n",
+       "2912           0                0         1652             0.0   \n",
+       "2913           0                0          630             1.0   \n",
+       "2914         546                0         1092             0.0   \n",
+       "2915           0                0         1360             1.0   \n",
+       "2916           0                0          630             1.0   \n",
+       "2917         546                0         1092             0.0   \n",
+       "2918         546                0         1092             0.0   \n",
+       "2919         546                0         1092             0.0   \n",
+       "2920         546                0         1092             0.0   \n",
+       "2921           0                0         1728             0.0   \n",
+       "2922           0                0         1728             2.0   \n",
+       "2923           0                0         1126             1.0   \n",
+       "2924           0                0         1224             1.0   \n",
+       "2925           0                0         1003             1.0   \n",
+       "2926           0                0          902             1.0   \n",
+       "2927           0                0          970             0.0   \n",
+       "2928           0                0         1389             1.0   \n",
+       "2929        1004                0         2000             0.0   \n",
+       "\n",
+       "      Bsmt Half Bath  Full Bath  Half Bath  Bedroom AbvGr  Kitchen AbvGr  \\\n",
+       "0                0.0          1          0              3              1   \n",
+       "1                0.0          1          0              2              1   \n",
+       "2                0.0          1          1              3              1   \n",
+       "3                0.0          2          1              3              1   \n",
+       "4                0.0          2          1              3              1   \n",
+       "5                0.0          2          1              3              1   \n",
+       "6                0.0          2          0              2              1   \n",
+       "7                0.0          2          0              2              1   \n",
+       "8                0.0          2          0              2              1   \n",
+       "9                0.0          2          1              3              1   \n",
+       "10               0.0          2          1              3              1   \n",
+       "11               0.0          2          0              3              1   \n",
+       "12               0.0          2          1              3              1   \n",
+       "13               0.0          1          1              2              1   \n",
+       "14               0.0          1          1              1              1   \n",
+       "15               0.0          3          1              4              1   \n",
+       "16               0.0          2          0              4              1   \n",
+       "17               0.0          1          1              1              1   \n",
+       "18               0.0          1          0              2              1   \n",
+       "19               0.0          2          0              3              1   \n",
+       "20               0.0          2          0              3              1   \n",
+       "21               0.0          2          0              3              1   \n",
+       "22               0.0          2          1              3              1   \n",
+       "23               0.0          1          0              2              1   \n",
+       "24               0.0          1          1              3              1   \n",
+       "25               0.0          1          0              3              1   \n",
+       "26               0.0          1          0              2              1   \n",
+       "27               0.0          1          0              3              1   \n",
+       "28               0.0          2          0              2              1   \n",
+       "29               0.0          1          1              2              1   \n",
+       "...              ...        ...        ...            ...            ...   \n",
+       "2900             0.0          2          0              3              1   \n",
+       "2901             0.0          2          0              2              1   \n",
+       "2902             0.0          2          0              3              1   \n",
+       "2903             0.0          1          1              3              1   \n",
+       "2904             0.0          2          0              2              2   \n",
+       "2905             0.0          2          0              2              1   \n",
+       "2906             0.0          2          0              2              1   \n",
+       "2907             0.0          1          0              3              1   \n",
+       "2908             0.0          1          1              3              1   \n",
+       "2909             0.0          2          0              4              1   \n",
+       "2910             0.0          2          1              5              1   \n",
+       "2911             0.0          1          0              3              1   \n",
+       "2912             0.0          2          0              4              2   \n",
+       "2913             0.0          1          0              1              1   \n",
+       "2914             0.0          1          1              3              1   \n",
+       "2915             0.0          1          0              3              1   \n",
+       "2916             0.0          1          0              1              1   \n",
+       "2917             0.0          1          1              3              1   \n",
+       "2918             0.0          1          1              3              1   \n",
+       "2919             0.0          1          1              3              1   \n",
+       "2920             0.0          1          1              3              1   \n",
+       "2921             0.0          2          0              4              2   \n",
+       "2922             0.0          2          0              4              2   \n",
+       "2923             0.0          2          0              3              1   \n",
+       "2924             0.0          1          0              4              1   \n",
+       "2925             0.0          1          0              3              1   \n",
+       "2926             0.0          1          0              2              1   \n",
+       "2927             1.0          1          0              3              1   \n",
+       "2928             0.0          1          0              2              1   \n",
+       "2929             0.0          2          1              3              1   \n",
+       "\n",
+       "      TotRms AbvGrd  Fireplaces  Garage Cars  Garage Area  Wood Deck SF  \\\n",
+       "0                 7           2          2.0        528.0           210   \n",
+       "1                 5           0          1.0        730.0           140   \n",
+       "2                 6           0          1.0        312.0           393   \n",
+       "3                 8           2          2.0        522.0             0   \n",
+       "4                 6           1          2.0        482.0           212   \n",
+       "5                 7           1          2.0        470.0           360   \n",
+       "6                 6           0          2.0        582.0             0   \n",
+       "7                 5           0          2.0        506.0             0   \n",
+       "8                 5           1          2.0        608.0           237   \n",
+       "9                 7           1          2.0        442.0           140   \n",
+       "10                7           1          2.0        440.0           157   \n",
+       "11                6           0          2.0        420.0           483   \n",
+       "12                7           1          2.0        393.0             0   \n",
+       "13                5           1          2.0        506.0           192   \n",
+       "14                4           0          2.0        528.0             0   \n",
+       "15               12           1          3.0        841.0           503   \n",
+       "16                8           0          2.0        492.0           325   \n",
+       "17                8           1          3.0        834.0           113   \n",
+       "18                4           0          2.0        400.0             0   \n",
+       "19                7           2          2.0        500.0           349   \n",
+       "20                7           1          2.0        546.0             0   \n",
+       "21                6           2          2.0        528.0             0   \n",
+       "22                7           0          2.0        663.0             0   \n",
+       "23                5           1          2.0        480.0             0   \n",
+       "24                6           1          2.0        500.0             0   \n",
+       "25                6           1          1.0        304.0             0   \n",
+       "26                4           0          2.0        525.0           240   \n",
+       "27                5           1          0.0          0.0             0   \n",
+       "28                5           1          2.0        511.0           203   \n",
+       "29                5           0          1.0        264.0           275   \n",
+       "...             ...         ...          ...          ...           ...   \n",
+       "2900              8           2          3.0        714.0           172   \n",
+       "2901              7           2          3.0        880.0           326   \n",
+       "2902              9           1          3.0        682.0           161   \n",
+       "2903              6           0          1.0        270.0             0   \n",
+       "2904              8           0          4.0        784.0             0   \n",
+       "2905              5           0          2.0        402.0             0   \n",
+       "2906              5           0          2.0        405.0             0   \n",
+       "2907              6           0          0.0          0.0            36   \n",
+       "2908              5           1          1.0        336.0           141   \n",
+       "2909              6           1          1.0        336.0           104   \n",
+       "2910             10           1          2.0        576.0           728   \n",
+       "2911              5           0          1.0        288.0             0   \n",
+       "2912              8           0          3.0        928.0             0   \n",
+       "2913              3           0          0.0          0.0             0   \n",
+       "2914              5           0          1.0        253.0             0   \n",
+       "2915              8           1          1.0        336.0           160   \n",
+       "2916              3           0          0.0          0.0             0   \n",
+       "2917              5           0          1.0        286.0             0   \n",
+       "2918              5           0          0.0          0.0             0   \n",
+       "2919              5           0          0.0          0.0             0   \n",
+       "2920              6           0          1.0        286.0             0   \n",
+       "2921              8           0          2.0        574.0            40   \n",
+       "2922              8           0          2.0        560.0             0   \n",
+       "2923              5           1          2.0        484.0           295   \n",
+       "2924              7           1          2.0        576.0           474   \n",
+       "2925              6           0          2.0        588.0           120   \n",
+       "2926              5           0          2.0        484.0           164   \n",
+       "2927              6           0          0.0          0.0            80   \n",
+       "2928              6           1          2.0        418.0           240   \n",
+       "2929              9           1          3.0        650.0           190   \n",
+       "\n",
+       "      Open Porch SF  Enclosed Porch  3Ssn Porch  Screen Porch  Pool Area  \\\n",
+       "0                62               0           0             0          0   \n",
+       "1                 0               0           0           120          0   \n",
+       "2                36               0           0             0          0   \n",
+       "3                 0               0           0             0          0   \n",
+       "4                34               0           0             0          0   \n",
+       "5                36               0           0             0          0   \n",
+       "6                 0             170           0             0          0   \n",
+       "7                82               0           0           144          0   \n",
+       "8               152               0           0             0          0   \n",
+       "9                60               0           0             0          0   \n",
+       "10               84               0           0             0          0   \n",
+       "11               21               0           0             0          0   \n",
+       "12               75               0           0             0          0   \n",
+       "13                0               0           0             0          0   \n",
+       "14               54               0           0           140          0   \n",
+       "15               36               0           0           210          0   \n",
+       "16               12               0           0             0          0   \n",
+       "17                0               0           0             0          0   \n",
+       "18                0               0           0             0          0   \n",
+       "19                0               0           0             0          0   \n",
+       "20              122               0           0             0          0   \n",
+       "21              120               0           0             0          0   \n",
+       "22               96               0           0             0          0   \n",
+       "23                0               0           0             0          0   \n",
+       "24                0               0           0             0          0   \n",
+       "25               85             184           0             0          0   \n",
+       "26                0               0           0             0          0   \n",
+       "27                0               0           0             0          0   \n",
+       "28               68               0           0             0          0   \n",
+       "29                0               0           0             0          0   \n",
+       "...             ...             ...         ...           ...        ...   \n",
+       "2900             38               0           0             0          0   \n",
+       "2901             66               0           0             0          0   \n",
+       "2902            225               0           0             0          0   \n",
+       "2903              0             135           0             0          0   \n",
+       "2904             48               0           0             0          0   \n",
+       "2905            125               0           0             0          0   \n",
+       "2906            199               0           0             0          0   \n",
+       "2907             56               0           0             0          0   \n",
+       "2908              0               0           0             0          0   \n",
+       "2909             26               0           0             0          0   \n",
+       "2910             20               0           0             0          0   \n",
+       "2911            120               0           0             0          0   \n",
+       "2912              0               0           0             0          0   \n",
+       "2913              0               0           0             0          0   \n",
+       "2914              0               0           0             0          0   \n",
+       "2915              0               0           0             0          0   \n",
+       "2916              0               0           0             0          0   \n",
+       "2917              0               0           0             0          0   \n",
+       "2918             34               0           0             0          0   \n",
+       "2919              0               0           0             0          0   \n",
+       "2920             24               0           0             0          0   \n",
+       "2921              0               0           0             0          0   \n",
+       "2922              0               0           0             0          0   \n",
+       "2923             41               0           0             0          0   \n",
+       "2924              0               0           0             0          0   \n",
+       "2925              0               0           0             0          0   \n",
+       "2926              0               0           0             0          0   \n",
+       "2927             32               0           0             0          0   \n",
+       "2928             38               0           0             0          0   \n",
+       "2929             48               0           0             0          0   \n",
+       "\n",
+       "      Misc Val  Yr Sold  SalePrice  Years Before Sale  Years Since Remod  \n",
+       "0            0     2010     215000                 50                 50  \n",
+       "1            0     2010     105000                 49                 49  \n",
+       "2        12500     2010     172000                 52                 52  \n",
+       "3            0     2010     244000                 42                 42  \n",
+       "4            0     2010     189900                 13                 12  \n",
+       "5            0     2010     195500                 12                 12  \n",
+       "6            0     2010     213500                  9                  9  \n",
+       "7            0     2010     191500                 18                 18  \n",
+       "8            0     2010     236500                 15                 14  \n",
+       "9            0     2010     189000                 11                 11  \n",
+       "10           0     2010     175900                 17                 16  \n",
+       "11         500     2010     185000                 18                  3  \n",
+       "12           0     2010     180400                 12                 12  \n",
+       "13           0     2010     171500                 20                 20  \n",
+       "14           0     2010     212000                 25                 25  \n",
+       "15           0     2010     538000                  7                  7  \n",
+       "16           0     2010     164000                 22                  5  \n",
+       "17           0     2010     394432                  0                  0  \n",
+       "18           0     2010     141000                 59                 59  \n",
+       "19           0     2010     210000                 32                 22  \n",
+       "20           0     2010     190000                 33                 33  \n",
+       "21           0     2010     170000                 36                 36  \n",
+       "22           0     2010     216000                 10                 10  \n",
+       "23         700     2010     149000                 40                 40  \n",
+       "24           0     2010     149900                 39                  2  \n",
+       "25           0     2010     142000                 42                 42  \n",
+       "26           0     2010     126000                 40                 40  \n",
+       "27           0     2010     115000                 39                 39  \n",
+       "28           0     2010     184000                 11                 11  \n",
+       "29           0     2010      96000                 39                 39  \n",
+       "...        ...      ...        ...                ...                ...  \n",
+       "2900         0     2006     320000                  1                  0  \n",
+       "2901         0     2006     369900                  1                  0  \n",
+       "2902         0     2006     359900                  1                  0  \n",
+       "2903         0     2006      81500                 55                 55  \n",
+       "2904         0     2006     215000                  9                  9  \n",
+       "2905         0     2006     164000                  8                  8  \n",
+       "2906         0     2006     153500                  8                  8  \n",
+       "2907         0     2006      84500                  0                  0  \n",
+       "2908         0     2006     104500                 29                 29  \n",
+       "2909         0     2006     127000                 29                 29  \n",
+       "2910         0     2006     151400                 33                 33  \n",
+       "2911         0     2006     126500                 38                  3  \n",
+       "2912         0     2006     146500                 36                 36  \n",
+       "2913         0     2006      73000                 36                 36  \n",
+       "2914         0     2006      79400                 34                 34  \n",
+       "2915         0     2006     140000                 37                 27  \n",
+       "2916         0     2006      92000                 36                 36  \n",
+       "2917         0     2006      87550                 36                 36  \n",
+       "2918         0     2006      79500                 36                 36  \n",
+       "2919         0     2006      90500                 36                 36  \n",
+       "2920         0     2006      71000                 36                 36  \n",
+       "2921         0     2006     150900                 30                 30  \n",
+       "2922         0     2006     188000                 30                 30  \n",
+       "2923         0     2006     160000                 29                 29  \n",
+       "2924         0     2006     131000                 46                 10  \n",
+       "2925         0     2006     142500                 22                 22  \n",
+       "2926         0     2006     131000                 23                 23  \n",
+       "2927       700     2006     132000                 14                 14  \n",
+       "2928         0     2006     170000                 32                 31  \n",
+       "2929         0     2006     188000                 13                 12  \n",
+       "\n",
+       "[2927 rows x 34 columns]"
+      ]
+     },
+     "execution_count": 389,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "numerical_df = transform_df.select_dtypes(include=['int', 'float'])\n",
+    "numerical_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 390,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BsmtFin SF 2         0.006127\n",
+       "Misc Val             0.019273\n",
+       "Yr Sold              0.030358\n",
+       "3Ssn Porch           0.032268\n",
+       "Bsmt Half Bath       0.035875\n",
+       "Low Qual Fin SF      0.037629\n",
+       "Pool Area            0.068438\n",
+       "MS SubClass          0.085128\n",
+       "Overall Cond         0.101540\n",
+       "Screen Porch         0.112280\n",
+       "Kitchen AbvGr        0.119760\n",
+       "Enclosed Porch       0.128685\n",
+       "Bedroom AbvGr        0.143916\n",
+       "Bsmt Unf SF          0.182751\n",
+       "Lot Area             0.267520\n",
+       "2nd Flr SF           0.269601\n",
+       "Bsmt Full Bath       0.276258\n",
+       "Half Bath            0.284871\n",
+       "Open Porch SF        0.316262\n",
+       "Wood Deck SF         0.328183\n",
+       "BsmtFin SF 1         0.439284\n",
+       "Fireplaces           0.474831\n",
+       "TotRms AbvGrd        0.498574\n",
+       "Mas Vnr Area         0.506983\n",
+       "Years Since Remod    0.534985\n",
+       "Full Bath            0.546118\n",
+       "Years Before Sale    0.558979\n",
+       "1st Flr SF           0.635185\n",
+       "Garage Area          0.641425\n",
+       "Total Bsmt SF        0.644012\n",
+       "Garage Cars          0.648361\n",
+       "Gr Liv Area          0.717596\n",
+       "Overall Qual         0.801206\n",
+       "SalePrice            1.000000\n",
+       "Name: SalePrice, dtype: float64"
+      ]
+     },
+     "execution_count": 390,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "abs_corr_coeffs = numerical_df.corr()['SalePrice'].abs().sort_values()\n",
+    "abs_corr_coeffs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 391,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BsmtFin SF 1         0.439284\n",
+       "Fireplaces           0.474831\n",
+       "TotRms AbvGrd        0.498574\n",
+       "Mas Vnr Area         0.506983\n",
+       "Years Since Remod    0.534985\n",
+       "Full Bath            0.546118\n",
+       "Years Before Sale    0.558979\n",
+       "1st Flr SF           0.635185\n",
+       "Garage Area          0.641425\n",
+       "Total Bsmt SF        0.644012\n",
+       "Garage Cars          0.648361\n",
+       "Gr Liv Area          0.717596\n",
+       "Overall Qual         0.801206\n",
+       "SalePrice            1.000000\n",
+       "Name: SalePrice, dtype: float64"
+      ]
+     },
+     "execution_count": 391,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## Let's only keep columns with a correlation coefficient of larger than 0.4 (arbitrary, worth experimenting later!)\n",
+    "abs_corr_coeffs[abs_corr_coeffs > 0.4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 392,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "## Drop columns with less than 0.4 correlation with SalePrice\n",
+    "transform_df = transform_df.drop(abs_corr_coeffs[abs_corr_coeffs < 0.4].index, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Which categorical columns should we keep?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 393,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "## Create a list of column names from documentation that are *meant* to be categorical\n",
+    "nominal_features = [\"PID\", \"MS SubClass\", \"MS Zoning\", \"Street\", \"Alley\", \"Land Contour\", \"Lot Config\", \"Neighborhood\", \n",
+    "                    \"Condition 1\", \"Condition 2\", \"Bldg Type\", \"House Style\", \"Roof Style\", \"Roof Matl\", \"Exterior 1st\", \n",
+    "                    \"Exterior 2nd\", \"Mas Vnr Type\", \"Foundation\", \"Heating\", \"Central Air\", \"Garage Type\", \n",
+    "                    \"Misc Feature\", \"Sale Type\", \"Sale Condition\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Which columns are currently numerical but need to be encoded as categorical instead (because the numbers don't have any semantic meaning)?\n",
+    "- If a categorical column has hundreds of unique values (or categories), should we keep it? When we dummy code this column, hundreds of columns will need to be added back to the data frame."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 394,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "## Which categorical columns have we still carried with us? We'll test tehse \n",
+    "transform_cat_cols = []\n",
+    "for col in nominal_features:\n",
+    "    if col in transform_df.columns:\n",
+    "        transform_cat_cols.append(col)\n",
+    "\n",
+    "## How many unique values in each categorical column?\n",
+    "uniqueness_counts = transform_df[transform_cat_cols].apply(lambda col: len(col.value_counts())).sort_values()\n",
+    "## Aribtrary cutoff of 10 unique values (worth experimenting)\n",
+    "drop_nonuniq_cols = uniqueness_counts[uniqueness_counts > 10].index\n",
+    "transform_df = transform_df.drop(drop_nonuniq_cols, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 395,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "## Select just the remaining text columns and convert to categorical\n",
+    "text_cols = transform_df.select_dtypes(include=['object'])\n",
+    "for col in text_cols:\n",
+    "    transform_df[col] = transform_df[col].astype('category')\n",
+    "    \n",
+    "## Create dummy columns and add back to the dataframe!\n",
+    "transform_df = pd.concat([\n",
+    "    transform_df, \n",
+    "    pd.get_dummies(transform_df.select_dtypes(include=['category']))\n",
+    "], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Update `select_features()`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 426,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[26865.660668584878, 36061.238855571239, 24743.661620667855, 27948.830118737143]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "28904.84781589028"
+      ]
+     },
+     "execution_count": 426,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def transform_features(df):\n",
+    "    num_missing = df.isnull().sum()\n",
+    "    drop_missing_cols = num_missing[(num_missing > len(df)/20)].sort_values()\n",
+    "    df = df.drop(drop_missing_cols.index, axis=1)\n",
+    "    \n",
+    "    text_mv_counts = df.select_dtypes(include=['object']).isnull().sum().sort_values(ascending=False)\n",
+    "    drop_missing_cols_2 = text_mv_counts[text_mv_counts > 0]\n",
+    "    df = df.drop(drop_missing_cols_2.index, axis=1)\n",
+    "    \n",
+    "    num_missing = df.select_dtypes(include=['int', 'float']).isnull().sum()\n",
+    "    fixable_numeric_cols = num_missing[(num_missing < len(df)/20) & (num_missing > 0)].sort_values()\n",
+    "    replacement_values_dict = df[fixable_numeric_cols.index].mode().to_dict(orient='records')[0]\n",
+    "    df = df.fillna(replacement_values_dict)\n",
+    "    \n",
+    "    years_sold = df['Yr Sold'] - df['Year Built']\n",
+    "    years_since_remod = df['Yr Sold'] - df['Year Remod/Add']\n",
+    "    df['Years Before Sale'] = years_sold\n",
+    "    df['Years Since Remod'] = years_since_remod\n",
+    "    df = df.drop([1702, 2180, 2181], axis=0)\n",
+    "\n",
+    "    df = df.drop([\"PID\", \"Order\", \"Mo Sold\", \"Sale Condition\", \"Sale Type\", \"Year Built\", \"Year Remod/Add\"], axis=1)\n",
+    "    return df\n",
+    "\n",
+    "def select_features(df, coeff_threshold=0.4, uniq_threshold=10):\n",
+    "    numerical_df = df.select_dtypes(include=['int', 'float'])\n",
+    "    abs_corr_coeffs = numerical_df.corr()['SalePrice'].abs().sort_values()\n",
+    "    df = df.drop(abs_corr_coeffs[abs_corr_coeffs < coeff_threshold].index, axis=1)\n",
+    "    \n",
+    "    nominal_features = [\"PID\", \"MS SubClass\", \"MS Zoning\", \"Street\", \"Alley\", \"Land Contour\", \"Lot Config\", \"Neighborhood\", \n",
+    "                    \"Condition 1\", \"Condition 2\", \"Bldg Type\", \"House Style\", \"Roof Style\", \"Roof Matl\", \"Exterior 1st\", \n",
+    "                    \"Exterior 2nd\", \"Mas Vnr Type\", \"Foundation\", \"Heating\", \"Central Air\", \"Garage Type\", \n",
+    "                    \"Misc Feature\", \"Sale Type\", \"Sale Condition\"]\n",
+    "    \n",
+    "    transform_cat_cols = []\n",
+    "    for col in nominal_features:\n",
+    "        if col in df.columns:\n",
+    "            transform_cat_cols.append(col)\n",
+    "\n",
+    "    uniqueness_counts = df[transform_cat_cols].apply(lambda col: len(col.value_counts())).sort_values()\n",
+    "    drop_nonuniq_cols = uniqueness_counts[uniqueness_counts > 10].index\n",
+    "    df = df.drop(drop_nonuniq_cols, axis=1)\n",
+    "    \n",
+    "    text_cols = df.select_dtypes(include=['object'])\n",
+    "    for col in text_cols:\n",
+    "        df[col] = df[col].astype('category')\n",
+    "    df = pd.concat([df, pd.get_dummies(df.select_dtypes(include=['category']))], axis=1)\n",
+    "    \n",
+    "    return df\n",
+    "\n",
+    "def train_and_test(df, k=0):\n",
+    "    numeric_df = df.select_dtypes(include=['integer', 'float'])\n",
+    "    features = numeric_df.columns.drop(\"SalePrice\")\n",
+    "    lr = linear_model.LinearRegression()\n",
+    "    \n",
+    "    if k == 0:\n",
+    "        train = df[:1460]\n",
+    "        test = df[1460:]\n",
+    "\n",
+    "        lr.fit(train[features], train[\"SalePrice\"])\n",
+    "        predictions = lr.predict(test[features])\n",
+    "        mse = mean_squared_error(test[\"SalePrice\"], predictions)\n",
+    "        rmse = np.sqrt(mse)\n",
+    "\n",
+    "        return rmse\n",
+    "    \n",
+    "    if k == 1:\n",
+    "        # Randomize *all* rows (frac=1) from `df` and return\n",
+    "        shuffled_df = df.sample(frac=1, )\n",
+    "        train = df[:1460]\n",
+    "        test = df[1460:]\n",
+    "        \n",
+    "        lr.fit(train[features], train[\"SalePrice\"])\n",
+    "        predictions_one = lr.predict(test[features])        \n",
+    "        \n",
+    "        mse_one = mean_squared_error(test[\"SalePrice\"], predictions_one)\n",
+    "        rmse_one = np.sqrt(mse_one)\n",
+    "        \n",
+    "        lr.fit(test[features], test[\"SalePrice\"])\n",
+    "        predictions_two = lr.predict(train[features])        \n",
+    "       \n",
+    "        mse_two = mean_squared_error(train[\"SalePrice\"], predictions_two)\n",
+    "        rmse_two = np.sqrt(mse_two)\n",
+    "        \n",
+    "        avg_rmse = np.mean([rmse_one, rmse_two])\n",
+    "        print(rmse_one)\n",
+    "        print(rmse_two)\n",
+    "        return avg_rmse\n",
+    "    else:\n",
+    "        kf = KFold(n_splits=k, shuffle=True)\n",
+    "        rmse_values = []\n",
+    "        for train_index, test_index, in kf.split(df):\n",
+    "            train = df.iloc[train_index]\n",
+    "            test = df.iloc[test_index]\n",
+    "            lr.fit(train[features], train[\"SalePrice\"])\n",
+    "            predictions = lr.predict(test[features])\n",
+    "            mse = mean_squared_error(test[\"SalePrice\"], predictions)\n",
+    "            rmse = np.sqrt(mse)\n",
+    "            rmse_values.append(rmse)\n",
+    "        print(rmse_values)\n",
+    "        avg_rmse = np.mean(rmse_values)\n",
+    "        return avg_rmse\n",
+    "\n",
+    "df = pd.read_csv(\"AmesHousing.tsv\", delimiter=\"\\t\")\n",
+    "transform_df = transform_features(df)\n",
+    "filtered_df = select_features(transform_df)\n",
+    "rmse = train_and_test(filtered_df, k=4)\n",
+    "\n",
+    "rmse"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}