{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Introduction To The Data Set" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "pd.options.display.max_columns = 99" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "cols = ['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration', 'num-of-doors', 'body-style', \n", " 'drive-wheels', 'engine-location', 'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type', \n", " 'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke', 'compression-rate', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 'price']\n", "cars = pd.read_csv('imports-85.data', names=cols)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
symbolingnormalized-lossesmakefuel-typeaspirationnum-of-doorsbody-styledrive-wheelsengine-locationwheel-baselengthwidthheightcurb-weightengine-typenum-of-cylindersengine-sizefuel-systemborestrokecompression-ratehorsepowerpeak-rpmcity-mpghighway-mpgprice
03?alfa-romerogasstdtwoconvertiblerwdfront88.6168.864.148.82548dohcfour130mpfi3.472.689.001115000212713495
13?alfa-romerogasstdtwoconvertiblerwdfront88.6168.864.148.82548dohcfour130mpfi3.472.689.001115000212716500
21?alfa-romerogasstdtwohatchbackrwdfront94.5171.265.552.42823ohcvsix152mpfi2.683.479.001545000192616500
32164audigasstdfoursedanfwdfront99.8176.666.254.32337ohcfour109mpfi3.193.4010.001025500243013950
42164audigasstdfoursedan4wdfront99.4176.666.454.32824ohcfive136mpfi3.193.408.001155500182217450
52?audigasstdtwosedanfwdfront99.8177.366.353.12507ohcfive136mpfi3.193.408.501105500192515250
61158audigasstdfoursedanfwdfront105.8192.771.455.72844ohcfive136mpfi3.193.408.501105500192517710
71?audigasstdfourwagonfwdfront105.8192.771.455.72954ohcfive136mpfi3.193.408.501105500192518920
81158audigasturbofoursedanfwdfront105.8192.771.455.93086ohcfive131mpfi3.133.408.301405500172023875
90?audigasturbotwohatchback4wdfront99.5178.267.952.03053ohcfive131mpfi3.133.407.0016055001622?
102192bmwgasstdtwosedanrwdfront101.2176.864.854.32395ohcfour108mpfi3.502.808.801015800232916430
110192bmwgasstdfoursedanrwdfront101.2176.864.854.32395ohcfour108mpfi3.502.808.801015800232916925
120188bmwgasstdtwosedanrwdfront101.2176.864.854.32710ohcsix164mpfi3.313.199.001214250212820970
130188bmwgasstdfoursedanrwdfront101.2176.864.854.32765ohcsix164mpfi3.313.199.001214250212821105
141?bmwgasstdfoursedanrwdfront103.5189.066.955.73055ohcsix164mpfi3.313.199.001214250202524565
150?bmwgasstdfoursedanrwdfront103.5189.066.955.73230ohcsix209mpfi3.623.398.001825400162230760
160?bmwgasstdtwosedanrwdfront103.5193.867.953.73380ohcsix209mpfi3.623.398.001825400162241315
170?bmwgasstdfoursedanrwdfront110.0197.070.956.33505ohcsix209mpfi3.623.398.001825400152036880
182121chevroletgasstdtwohatchbackfwdfront88.4141.160.353.21488lthree612bbl2.913.039.5048510047535151
19198chevroletgasstdtwohatchbackfwdfront94.5155.963.652.01874ohcfour902bbl3.033.119.6070540038436295
20081chevroletgasstdfoursedanfwdfront94.5158.863.652.01909ohcfour902bbl3.033.119.6070540038436575
211118dodgegasstdtwohatchbackfwdfront93.7157.363.850.81876ohcfour902bbl2.973.239.4168550037415572
221118dodgegasstdtwohatchbackfwdfront93.7157.363.850.81876ohcfour902bbl2.973.239.4068550031386377
231118dodgegasturbotwohatchbackfwdfront93.7157.363.850.82128ohcfour98mpfi3.033.397.60102550024307957
241148dodgegasstdfourhatchbackfwdfront93.7157.363.850.61967ohcfour902bbl2.973.239.4068550031386229
251148dodgegasstdfoursedanfwdfront93.7157.363.850.61989ohcfour902bbl2.973.239.4068550031386692
261148dodgegasstdfoursedanfwdfront93.7157.363.850.61989ohcfour902bbl2.973.239.4068550031387609
271148dodgegasturbo?sedanfwdfront93.7157.363.850.62191ohcfour98mpfi3.033.397.60102550024308558
28-1110dodgegasstdfourwagonfwdfront103.3174.664.659.82535ohcfour1222bbl3.343.468.5088500024308921
293145dodgegasturbotwohatchbackfwdfront95.9173.266.350.22811ohcfour156mfi3.603.907.001455000192412964
.................................................................................
175-165toyotagasstdfourhatchbackfwdfront102.4175.666.553.92414ohcfour122mpfi3.313.548.7092420027329988
176-165toyotagasstdfoursedanfwdfront102.4175.666.554.92414ohcfour122mpfi3.313.548.70924200273210898
177-165toyotagasstdfourhatchbackfwdfront102.4175.666.553.92458ohcfour122mpfi3.313.548.70924200273211248
1783197toyotagasstdtwohatchbackrwdfront102.9183.567.752.02976dohcsix171mpfi3.273.359.301615200202416558
1793197toyotagasstdtwohatchbackrwdfront102.9183.567.752.03016dohcsix171mpfi3.273.359.301615200192415998
180-190toyotagasstdfoursedanrwdfront104.5187.866.554.13131dohcsix171mpfi3.273.359.201565200202415690
181-1?toyotagasstdfourwagonrwdfront104.5187.866.554.13151dohcsix161mpfi3.273.359.201565200192415750
1822122volkswagendieselstdtwosedanfwdfront97.3171.765.555.72261ohcfour97idi3.013.4023.0052480037467775
1832122volkswagengasstdtwosedanfwdfront97.3171.765.555.72209ohcfour109mpfi3.193.409.0085525027347975
184294volkswagendieselstdfoursedanfwdfront97.3171.765.555.72264ohcfour97idi3.013.4023.0052480037467995
185294volkswagengasstdfoursedanfwdfront97.3171.765.555.72212ohcfour109mpfi3.193.409.0085525027348195
186294volkswagengasstdfoursedanfwdfront97.3171.765.555.72275ohcfour109mpfi3.193.409.0085525027348495
187294volkswagendieselturbofoursedanfwdfront97.3171.765.555.72319ohcfour97idi3.013.4023.0068450037429495
188294volkswagengasstdfoursedanfwdfront97.3171.765.555.72300ohcfour109mpfi3.193.4010.00100550026329995
1893?volkswagengasstdtwoconvertiblefwdfront94.5159.364.255.62254ohcfour109mpfi3.193.408.50905500242911595
1903256volkswagengasstdtwohatchbackfwdfront94.5165.764.051.42221ohcfour109mpfi3.193.408.5090550024299980
1910?volkswagengasstdfoursedanfwdfront100.4180.266.955.12661ohcfive136mpfi3.193.408.501105500192413295
1920?volkswagendieselturbofoursedanfwdfront100.4180.266.955.12579ohcfour97idi3.013.4023.00684500333813845
1930?volkswagengasstdfourwagonfwdfront100.4183.166.955.12563ohcfour109mpfi3.193.409.00885500253112290
194-2103volvogasstdfoursedanrwdfront104.3188.867.256.22912ohcfour141mpfi3.783.159.501145400232812940
195-174volvogasstdfourwagonrwdfront104.3188.867.257.53034ohcfour141mpfi3.783.159.501145400232813415
196-2103volvogasstdfoursedanrwdfront104.3188.867.256.22935ohcfour141mpfi3.783.159.501145400242815985
197-174volvogasstdfourwagonrwdfront104.3188.867.257.53042ohcfour141mpfi3.783.159.501145400242816515
198-2103volvogasturbofoursedanrwdfront104.3188.867.256.23045ohcfour130mpfi3.623.157.501625100172218420
199-174volvogasturbofourwagonrwdfront104.3188.867.257.53157ohcfour130mpfi3.623.157.501625100172218950
200-195volvogasstdfoursedanrwdfront109.1188.868.955.52952ohcfour141mpfi3.783.159.501145400232816845
201-195volvogasturbofoursedanrwdfront109.1188.868.855.53049ohcfour141mpfi3.783.158.701605300192519045
202-195volvogasstdfoursedanrwdfront109.1188.868.955.53012ohcvsix173mpfi3.582.878.801345500182321485
203-195volvodieselturbofoursedanrwdfront109.1188.868.955.53217ohcsix145idi3.013.4023.001064800262722470
204-195volvogasturbofoursedanrwdfront109.1188.868.955.53062ohcfour141mpfi3.783.159.501145400192522625
\n", "

205 rows × 26 columns

\n", "
" ], "text/plain": [ " symboling normalized-losses make fuel-type aspiration \\\n", "0 3 ? alfa-romero gas std \n", "1 3 ? alfa-romero gas std \n", "2 1 ? alfa-romero gas std \n", "3 2 164 audi gas std \n", "4 2 164 audi gas std \n", "5 2 ? audi gas std \n", "6 1 158 audi gas std \n", "7 1 ? audi gas std \n", "8 1 158 audi gas turbo \n", "9 0 ? audi gas turbo \n", "10 2 192 bmw gas std \n", "11 0 192 bmw gas std \n", "12 0 188 bmw gas std \n", "13 0 188 bmw gas std \n", "14 1 ? bmw gas std \n", "15 0 ? bmw gas std \n", "16 0 ? bmw gas std \n", "17 0 ? bmw gas std \n", "18 2 121 chevrolet gas std \n", "19 1 98 chevrolet gas std \n", "20 0 81 chevrolet gas std \n", "21 1 118 dodge gas std \n", "22 1 118 dodge gas std \n", "23 1 118 dodge gas turbo \n", "24 1 148 dodge gas std \n", "25 1 148 dodge gas std \n", "26 1 148 dodge gas std \n", "27 1 148 dodge gas turbo \n", "28 -1 110 dodge gas std \n", "29 3 145 dodge gas turbo \n", ".. ... ... ... ... ... \n", "175 -1 65 toyota gas std \n", "176 -1 65 toyota gas std \n", "177 -1 65 toyota gas std \n", "178 3 197 toyota gas std \n", "179 3 197 toyota gas std \n", "180 -1 90 toyota gas std \n", "181 -1 ? toyota gas std \n", "182 2 122 volkswagen diesel std \n", "183 2 122 volkswagen gas std \n", "184 2 94 volkswagen diesel std \n", "185 2 94 volkswagen gas std \n", "186 2 94 volkswagen gas std \n", "187 2 94 volkswagen diesel turbo \n", "188 2 94 volkswagen gas std \n", "189 3 ? volkswagen gas std \n", "190 3 256 volkswagen gas std \n", "191 0 ? volkswagen gas std \n", "192 0 ? volkswagen diesel turbo \n", "193 0 ? volkswagen gas std \n", "194 -2 103 volvo gas std \n", "195 -1 74 volvo gas std \n", "196 -2 103 volvo gas std \n", "197 -1 74 volvo gas std \n", "198 -2 103 volvo gas turbo \n", "199 -1 74 volvo gas turbo \n", "200 -1 95 volvo gas std \n", "201 -1 95 volvo gas turbo \n", "202 -1 95 volvo gas std \n", "203 -1 95 volvo diesel turbo \n", "204 -1 95 volvo gas turbo \n", "\n", " num-of-doors body-style drive-wheels engine-location wheel-base \\\n", "0 two convertible rwd front 88.6 \n", "1 two convertible rwd front 88.6 \n", "2 two hatchback rwd front 94.5 \n", "3 four sedan fwd front 99.8 \n", "4 four sedan 4wd front 99.4 \n", "5 two sedan fwd front 99.8 \n", "6 four sedan fwd front 105.8 \n", "7 four wagon fwd front 105.8 \n", "8 four sedan fwd front 105.8 \n", "9 two hatchback 4wd front 99.5 \n", "10 two sedan rwd front 101.2 \n", "11 four sedan rwd front 101.2 \n", "12 two sedan rwd front 101.2 \n", "13 four sedan rwd front 101.2 \n", "14 four sedan rwd front 103.5 \n", "15 four sedan rwd front 103.5 \n", "16 two sedan rwd front 103.5 \n", "17 four sedan rwd front 110.0 \n", "18 two hatchback fwd front 88.4 \n", "19 two hatchback fwd front 94.5 \n", "20 four sedan fwd front 94.5 \n", "21 two hatchback fwd front 93.7 \n", "22 two hatchback fwd front 93.7 \n", "23 two hatchback fwd front 93.7 \n", "24 four hatchback fwd front 93.7 \n", "25 four sedan fwd front 93.7 \n", "26 four sedan fwd front 93.7 \n", "27 ? sedan fwd front 93.7 \n", "28 four wagon fwd front 103.3 \n", "29 two hatchback fwd front 95.9 \n", ".. ... ... ... ... ... \n", "175 four hatchback fwd front 102.4 \n", "176 four sedan fwd front 102.4 \n", "177 four hatchback fwd front 102.4 \n", "178 two hatchback rwd front 102.9 \n", "179 two hatchback rwd front 102.9 \n", "180 four sedan rwd front 104.5 \n", "181 four wagon rwd front 104.5 \n", "182 two sedan fwd front 97.3 \n", "183 two sedan fwd front 97.3 \n", "184 four sedan fwd front 97.3 \n", "185 four sedan fwd front 97.3 \n", "186 four sedan fwd front 97.3 \n", "187 four sedan fwd front 97.3 \n", "188 four sedan fwd front 97.3 \n", "189 two convertible fwd front 94.5 \n", "190 two hatchback fwd front 94.5 \n", "191 four sedan fwd front 100.4 \n", "192 four sedan fwd front 100.4 \n", "193 four wagon fwd front 100.4 \n", "194 four sedan rwd front 104.3 \n", "195 four wagon rwd front 104.3 \n", "196 four sedan rwd front 104.3 \n", "197 four wagon rwd front 104.3 \n", "198 four sedan rwd front 104.3 \n", "199 four wagon rwd front 104.3 \n", "200 four sedan rwd front 109.1 \n", "201 four sedan rwd front 109.1 \n", "202 four sedan rwd front 109.1 \n", "203 four sedan rwd front 109.1 \n", "204 four sedan rwd front 109.1 \n", "\n", " length width height curb-weight engine-type num-of-cylinders \\\n", "0 168.8 64.1 48.8 2548 dohc four \n", "1 168.8 64.1 48.8 2548 dohc four \n", "2 171.2 65.5 52.4 2823 ohcv six \n", "3 176.6 66.2 54.3 2337 ohc four \n", "4 176.6 66.4 54.3 2824 ohc five \n", "5 177.3 66.3 53.1 2507 ohc five \n", "6 192.7 71.4 55.7 2844 ohc five \n", "7 192.7 71.4 55.7 2954 ohc five \n", "8 192.7 71.4 55.9 3086 ohc five \n", "9 178.2 67.9 52.0 3053 ohc five \n", "10 176.8 64.8 54.3 2395 ohc four \n", "11 176.8 64.8 54.3 2395 ohc four \n", "12 176.8 64.8 54.3 2710 ohc six \n", "13 176.8 64.8 54.3 2765 ohc six \n", "14 189.0 66.9 55.7 3055 ohc six \n", "15 189.0 66.9 55.7 3230 ohc six \n", "16 193.8 67.9 53.7 3380 ohc six \n", "17 197.0 70.9 56.3 3505 ohc six \n", "18 141.1 60.3 53.2 1488 l three \n", "19 155.9 63.6 52.0 1874 ohc four \n", "20 158.8 63.6 52.0 1909 ohc four \n", "21 157.3 63.8 50.8 1876 ohc four \n", "22 157.3 63.8 50.8 1876 ohc four \n", "23 157.3 63.8 50.8 2128 ohc four \n", "24 157.3 63.8 50.6 1967 ohc four \n", "25 157.3 63.8 50.6 1989 ohc four \n", "26 157.3 63.8 50.6 1989 ohc four \n", "27 157.3 63.8 50.6 2191 ohc four \n", "28 174.6 64.6 59.8 2535 ohc four \n", "29 173.2 66.3 50.2 2811 ohc four \n", ".. ... ... ... ... ... ... \n", "175 175.6 66.5 53.9 2414 ohc four \n", "176 175.6 66.5 54.9 2414 ohc four \n", "177 175.6 66.5 53.9 2458 ohc four \n", "178 183.5 67.7 52.0 2976 dohc six \n", "179 183.5 67.7 52.0 3016 dohc six \n", "180 187.8 66.5 54.1 3131 dohc six \n", "181 187.8 66.5 54.1 3151 dohc six \n", "182 171.7 65.5 55.7 2261 ohc four \n", "183 171.7 65.5 55.7 2209 ohc four \n", "184 171.7 65.5 55.7 2264 ohc four \n", "185 171.7 65.5 55.7 2212 ohc four \n", "186 171.7 65.5 55.7 2275 ohc four \n", "187 171.7 65.5 55.7 2319 ohc four \n", "188 171.7 65.5 55.7 2300 ohc four \n", "189 159.3 64.2 55.6 2254 ohc four \n", "190 165.7 64.0 51.4 2221 ohc four \n", "191 180.2 66.9 55.1 2661 ohc five \n", "192 180.2 66.9 55.1 2579 ohc four \n", "193 183.1 66.9 55.1 2563 ohc four \n", "194 188.8 67.2 56.2 2912 ohc four \n", "195 188.8 67.2 57.5 3034 ohc four \n", "196 188.8 67.2 56.2 2935 ohc four \n", "197 188.8 67.2 57.5 3042 ohc four \n", "198 188.8 67.2 56.2 3045 ohc four \n", "199 188.8 67.2 57.5 3157 ohc four \n", "200 188.8 68.9 55.5 2952 ohc four \n", "201 188.8 68.8 55.5 3049 ohc four \n", "202 188.8 68.9 55.5 3012 ohcv six \n", "203 188.8 68.9 55.5 3217 ohc six \n", "204 188.8 68.9 55.5 3062 ohc four \n", "\n", " engine-size fuel-system bore stroke compression-rate horsepower \\\n", "0 130 mpfi 3.47 2.68 9.00 111 \n", "1 130 mpfi 3.47 2.68 9.00 111 \n", "2 152 mpfi 2.68 3.47 9.00 154 \n", "3 109 mpfi 3.19 3.40 10.00 102 \n", "4 136 mpfi 3.19 3.40 8.00 115 \n", "5 136 mpfi 3.19 3.40 8.50 110 \n", "6 136 mpfi 3.19 3.40 8.50 110 \n", "7 136 mpfi 3.19 3.40 8.50 110 \n", "8 131 mpfi 3.13 3.40 8.30 140 \n", "9 131 mpfi 3.13 3.40 7.00 160 \n", "10 108 mpfi 3.50 2.80 8.80 101 \n", "11 108 mpfi 3.50 2.80 8.80 101 \n", "12 164 mpfi 3.31 3.19 9.00 121 \n", "13 164 mpfi 3.31 3.19 9.00 121 \n", "14 164 mpfi 3.31 3.19 9.00 121 \n", "15 209 mpfi 3.62 3.39 8.00 182 \n", "16 209 mpfi 3.62 3.39 8.00 182 \n", "17 209 mpfi 3.62 3.39 8.00 182 \n", "18 61 2bbl 2.91 3.03 9.50 48 \n", "19 90 2bbl 3.03 3.11 9.60 70 \n", "20 90 2bbl 3.03 3.11 9.60 70 \n", "21 90 2bbl 2.97 3.23 9.41 68 \n", "22 90 2bbl 2.97 3.23 9.40 68 \n", "23 98 mpfi 3.03 3.39 7.60 102 \n", "24 90 2bbl 2.97 3.23 9.40 68 \n", "25 90 2bbl 2.97 3.23 9.40 68 \n", "26 90 2bbl 2.97 3.23 9.40 68 \n", "27 98 mpfi 3.03 3.39 7.60 102 \n", "28 122 2bbl 3.34 3.46 8.50 88 \n", "29 156 mfi 3.60 3.90 7.00 145 \n", ".. ... ... ... ... ... ... \n", "175 122 mpfi 3.31 3.54 8.70 92 \n", "176 122 mpfi 3.31 3.54 8.70 92 \n", "177 122 mpfi 3.31 3.54 8.70 92 \n", "178 171 mpfi 3.27 3.35 9.30 161 \n", "179 171 mpfi 3.27 3.35 9.30 161 \n", "180 171 mpfi 3.27 3.35 9.20 156 \n", "181 161 mpfi 3.27 3.35 9.20 156 \n", "182 97 idi 3.01 3.40 23.00 52 \n", "183 109 mpfi 3.19 3.40 9.00 85 \n", "184 97 idi 3.01 3.40 23.00 52 \n", "185 109 mpfi 3.19 3.40 9.00 85 \n", "186 109 mpfi 3.19 3.40 9.00 85 \n", "187 97 idi 3.01 3.40 23.00 68 \n", "188 109 mpfi 3.19 3.40 10.00 100 \n", "189 109 mpfi 3.19 3.40 8.50 90 \n", "190 109 mpfi 3.19 3.40 8.50 90 \n", "191 136 mpfi 3.19 3.40 8.50 110 \n", "192 97 idi 3.01 3.40 23.00 68 \n", "193 109 mpfi 3.19 3.40 9.00 88 \n", "194 141 mpfi 3.78 3.15 9.50 114 \n", "195 141 mpfi 3.78 3.15 9.50 114 \n", "196 141 mpfi 3.78 3.15 9.50 114 \n", "197 141 mpfi 3.78 3.15 9.50 114 \n", "198 130 mpfi 3.62 3.15 7.50 162 \n", "199 130 mpfi 3.62 3.15 7.50 162 \n", "200 141 mpfi 3.78 3.15 9.50 114 \n", "201 141 mpfi 3.78 3.15 8.70 160 \n", "202 173 mpfi 3.58 2.87 8.80 134 \n", "203 145 idi 3.01 3.40 23.00 106 \n", "204 141 mpfi 3.78 3.15 9.50 114 \n", "\n", " peak-rpm city-mpg highway-mpg price \n", "0 5000 21 27 13495 \n", "1 5000 21 27 16500 \n", "2 5000 19 26 16500 \n", "3 5500 24 30 13950 \n", "4 5500 18 22 17450 \n", "5 5500 19 25 15250 \n", "6 5500 19 25 17710 \n", "7 5500 19 25 18920 \n", "8 5500 17 20 23875 \n", "9 5500 16 22 ? \n", "10 5800 23 29 16430 \n", "11 5800 23 29 16925 \n", "12 4250 21 28 20970 \n", "13 4250 21 28 21105 \n", "14 4250 20 25 24565 \n", "15 5400 16 22 30760 \n", "16 5400 16 22 41315 \n", "17 5400 15 20 36880 \n", "18 5100 47 53 5151 \n", "19 5400 38 43 6295 \n", "20 5400 38 43 6575 \n", "21 5500 37 41 5572 \n", "22 5500 31 38 6377 \n", "23 5500 24 30 7957 \n", "24 5500 31 38 6229 \n", "25 5500 31 38 6692 \n", "26 5500 31 38 7609 \n", "27 5500 24 30 8558 \n", "28 5000 24 30 8921 \n", "29 5000 19 24 12964 \n", ".. ... ... ... ... \n", "175 4200 27 32 9988 \n", "176 4200 27 32 10898 \n", "177 4200 27 32 11248 \n", "178 5200 20 24 16558 \n", "179 5200 19 24 15998 \n", "180 5200 20 24 15690 \n", "181 5200 19 24 15750 \n", "182 4800 37 46 7775 \n", "183 5250 27 34 7975 \n", "184 4800 37 46 7995 \n", "185 5250 27 34 8195 \n", "186 5250 27 34 8495 \n", "187 4500 37 42 9495 \n", "188 5500 26 32 9995 \n", "189 5500 24 29 11595 \n", "190 5500 24 29 9980 \n", "191 5500 19 24 13295 \n", "192 4500 33 38 13845 \n", "193 5500 25 31 12290 \n", "194 5400 23 28 12940 \n", "195 5400 23 28 13415 \n", "196 5400 24 28 15985 \n", "197 5400 24 28 16515 \n", "198 5100 17 22 18420 \n", "199 5100 17 22 18950 \n", "200 5400 23 28 16845 \n", "201 5300 19 25 19045 \n", "202 5500 18 23 21485 \n", "203 4800 26 27 22470 \n", "204 5400 19 25 22625 \n", "\n", "[205 rows x 26 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cars" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "# Select only the columns with continuous values from - https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.names\n", "continuous_values_cols = ['normalized-losses', 'wheel-base', 'length', 'width', 'height', 'curb-weight', 'bore', 'stroke', 'compression-rate', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 'price']\n", "numeric_cars = cars[continuous_values_cols]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
normalized-losseswheel-baselengthwidthheightcurb-weightborestrokecompression-ratehorsepowerpeak-rpmcity-mpghighway-mpgprice
0?88.6168.864.148.825483.472.689.01115000212713495
1?88.6168.864.148.825483.472.689.01115000212716500
2?94.5171.265.552.428232.683.479.01545000192616500
316499.8176.666.254.323373.193.4010.01025500243013950
416499.4176.666.454.328243.193.408.01155500182217450
\n", "
" ], "text/plain": [ " normalized-losses wheel-base length width height curb-weight bore \\\n", "0 ? 88.6 168.8 64.1 48.8 2548 3.47 \n", "1 ? 88.6 168.8 64.1 48.8 2548 3.47 \n", "2 ? 94.5 171.2 65.5 52.4 2823 2.68 \n", "3 164 99.8 176.6 66.2 54.3 2337 3.19 \n", "4 164 99.4 176.6 66.4 54.3 2824 3.19 \n", "\n", " stroke compression-rate horsepower peak-rpm city-mpg highway-mpg price \n", "0 2.68 9.0 111 5000 21 27 13495 \n", "1 2.68 9.0 111 5000 21 27 16500 \n", "2 3.47 9.0 154 5000 19 26 16500 \n", "3 3.40 10.0 102 5500 24 30 13950 \n", "4 3.40 8.0 115 5500 18 22 17450 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numeric_cars.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data Cleaning" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
normalized-losseswheel-baselengthwidthheightcurb-weightborestrokecompression-ratehorsepowerpeak-rpmcity-mpghighway-mpgprice
0NaN88.6168.864.148.825483.472.689.001115000212713495
1NaN88.6168.864.148.825483.472.689.001115000212716500
2NaN94.5171.265.552.428232.683.479.001545000192616500
316499.8176.666.254.323373.193.4010.001025500243013950
416499.4176.666.454.328243.193.408.001155500182217450
5NaN99.8177.366.353.125073.193.408.501105500192515250
6158105.8192.771.455.728443.193.408.501105500192517710
7NaN105.8192.771.455.729543.193.408.501105500192518920
8158105.8192.771.455.930863.133.408.301405500172023875
9NaN99.5178.267.952.030533.133.407.0016055001622NaN
10192101.2176.864.854.323953.502.808.801015800232916430
11192101.2176.864.854.323953.502.808.801015800232916925
12188101.2176.864.854.327103.313.199.001214250212820970
13188101.2176.864.854.327653.313.199.001214250212821105
14NaN103.5189.066.955.730553.313.199.001214250202524565
15NaN103.5189.066.955.732303.623.398.001825400162230760
16NaN103.5193.867.953.733803.623.398.001825400162241315
17NaN110.0197.070.956.335053.623.398.001825400152036880
1812188.4141.160.353.214882.913.039.5048510047535151
199894.5155.963.652.018743.033.119.6070540038436295
208194.5158.863.652.019093.033.119.6070540038436575
2111893.7157.363.850.818762.973.239.4168550037415572
2211893.7157.363.850.818762.973.239.4068550031386377
2311893.7157.363.850.821283.033.397.60102550024307957
2414893.7157.363.850.619672.973.239.4068550031386229
2514893.7157.363.850.619892.973.239.4068550031386692
2614893.7157.363.850.619892.973.239.4068550031387609
2714893.7157.363.850.621913.033.397.60102550024308558
28110103.3174.664.659.825353.343.468.5088500024308921
2914595.9173.266.350.228113.603.907.001455000192412964
.............................................
17565102.4175.666.553.924143.313.548.7092420027329988
17665102.4175.666.554.924143.313.548.70924200273210898
17765102.4175.666.553.924583.313.548.70924200273211248
178197102.9183.567.752.029763.273.359.301615200202416558
179197102.9183.567.752.030163.273.359.301615200192415998
18090104.5187.866.554.131313.273.359.201565200202415690
181NaN104.5187.866.554.131513.273.359.201565200192415750
18212297.3171.765.555.722613.013.4023.0052480037467775
18312297.3171.765.555.722093.193.409.0085525027347975
1849497.3171.765.555.722643.013.4023.0052480037467995
1859497.3171.765.555.722123.193.409.0085525027348195
1869497.3171.765.555.722753.193.409.0085525027348495
1879497.3171.765.555.723193.013.4023.0068450037429495
1889497.3171.765.555.723003.193.4010.00100550026329995
189NaN94.5159.364.255.622543.193.408.50905500242911595
19025694.5165.764.051.422213.193.408.5090550024299980
191NaN100.4180.266.955.126613.193.408.501105500192413295
192NaN100.4180.266.955.125793.013.4023.00684500333813845
193NaN100.4183.166.955.125633.193.409.00885500253112290
194103104.3188.867.256.229123.783.159.501145400232812940
19574104.3188.867.257.530343.783.159.501145400232813415
196103104.3188.867.256.229353.783.159.501145400242815985
19774104.3188.867.257.530423.783.159.501145400242816515
198103104.3188.867.256.230453.623.157.501625100172218420
19974104.3188.867.257.531573.623.157.501625100172218950
20095109.1188.868.955.529523.783.159.501145400232816845
20195109.1188.868.855.530493.783.158.701605300192519045
20295109.1188.868.955.530123.582.878.801345500182321485
20395109.1188.868.955.532173.013.4023.001064800262722470
20495109.1188.868.955.530623.783.159.501145400192522625
\n", "

205 rows × 14 columns

\n", "
" ], "text/plain": [ " normalized-losses wheel-base length width height curb-weight bore \\\n", "0 NaN 88.6 168.8 64.1 48.8 2548 3.47 \n", "1 NaN 88.6 168.8 64.1 48.8 2548 3.47 \n", "2 NaN 94.5 171.2 65.5 52.4 2823 2.68 \n", "3 164 99.8 176.6 66.2 54.3 2337 3.19 \n", "4 164 99.4 176.6 66.4 54.3 2824 3.19 \n", "5 NaN 99.8 177.3 66.3 53.1 2507 3.19 \n", "6 158 105.8 192.7 71.4 55.7 2844 3.19 \n", "7 NaN 105.8 192.7 71.4 55.7 2954 3.19 \n", "8 158 105.8 192.7 71.4 55.9 3086 3.13 \n", "9 NaN 99.5 178.2 67.9 52.0 3053 3.13 \n", "10 192 101.2 176.8 64.8 54.3 2395 3.50 \n", "11 192 101.2 176.8 64.8 54.3 2395 3.50 \n", "12 188 101.2 176.8 64.8 54.3 2710 3.31 \n", "13 188 101.2 176.8 64.8 54.3 2765 3.31 \n", "14 NaN 103.5 189.0 66.9 55.7 3055 3.31 \n", "15 NaN 103.5 189.0 66.9 55.7 3230 3.62 \n", "16 NaN 103.5 193.8 67.9 53.7 3380 3.62 \n", "17 NaN 110.0 197.0 70.9 56.3 3505 3.62 \n", "18 121 88.4 141.1 60.3 53.2 1488 2.91 \n", "19 98 94.5 155.9 63.6 52.0 1874 3.03 \n", "20 81 94.5 158.8 63.6 52.0 1909 3.03 \n", "21 118 93.7 157.3 63.8 50.8 1876 2.97 \n", "22 118 93.7 157.3 63.8 50.8 1876 2.97 \n", "23 118 93.7 157.3 63.8 50.8 2128 3.03 \n", "24 148 93.7 157.3 63.8 50.6 1967 2.97 \n", "25 148 93.7 157.3 63.8 50.6 1989 2.97 \n", "26 148 93.7 157.3 63.8 50.6 1989 2.97 \n", "27 148 93.7 157.3 63.8 50.6 2191 3.03 \n", "28 110 103.3 174.6 64.6 59.8 2535 3.34 \n", "29 145 95.9 173.2 66.3 50.2 2811 3.60 \n", ".. ... ... ... ... ... ... ... \n", "175 65 102.4 175.6 66.5 53.9 2414 3.31 \n", "176 65 102.4 175.6 66.5 54.9 2414 3.31 \n", "177 65 102.4 175.6 66.5 53.9 2458 3.31 \n", "178 197 102.9 183.5 67.7 52.0 2976 3.27 \n", "179 197 102.9 183.5 67.7 52.0 3016 3.27 \n", "180 90 104.5 187.8 66.5 54.1 3131 3.27 \n", "181 NaN 104.5 187.8 66.5 54.1 3151 3.27 \n", "182 122 97.3 171.7 65.5 55.7 2261 3.01 \n", "183 122 97.3 171.7 65.5 55.7 2209 3.19 \n", "184 94 97.3 171.7 65.5 55.7 2264 3.01 \n", "185 94 97.3 171.7 65.5 55.7 2212 3.19 \n", "186 94 97.3 171.7 65.5 55.7 2275 3.19 \n", "187 94 97.3 171.7 65.5 55.7 2319 3.01 \n", "188 94 97.3 171.7 65.5 55.7 2300 3.19 \n", "189 NaN 94.5 159.3 64.2 55.6 2254 3.19 \n", "190 256 94.5 165.7 64.0 51.4 2221 3.19 \n", "191 NaN 100.4 180.2 66.9 55.1 2661 3.19 \n", "192 NaN 100.4 180.2 66.9 55.1 2579 3.01 \n", "193 NaN 100.4 183.1 66.9 55.1 2563 3.19 \n", "194 103 104.3 188.8 67.2 56.2 2912 3.78 \n", "195 74 104.3 188.8 67.2 57.5 3034 3.78 \n", "196 103 104.3 188.8 67.2 56.2 2935 3.78 \n", "197 74 104.3 188.8 67.2 57.5 3042 3.78 \n", "198 103 104.3 188.8 67.2 56.2 3045 3.62 \n", "199 74 104.3 188.8 67.2 57.5 3157 3.62 \n", "200 95 109.1 188.8 68.9 55.5 2952 3.78 \n", "201 95 109.1 188.8 68.8 55.5 3049 3.78 \n", "202 95 109.1 188.8 68.9 55.5 3012 3.58 \n", "203 95 109.1 188.8 68.9 55.5 3217 3.01 \n", "204 95 109.1 188.8 68.9 55.5 3062 3.78 \n", "\n", " stroke compression-rate horsepower peak-rpm city-mpg highway-mpg price \n", "0 2.68 9.00 111 5000 21 27 13495 \n", "1 2.68 9.00 111 5000 21 27 16500 \n", "2 3.47 9.00 154 5000 19 26 16500 \n", "3 3.40 10.00 102 5500 24 30 13950 \n", "4 3.40 8.00 115 5500 18 22 17450 \n", "5 3.40 8.50 110 5500 19 25 15250 \n", "6 3.40 8.50 110 5500 19 25 17710 \n", "7 3.40 8.50 110 5500 19 25 18920 \n", "8 3.40 8.30 140 5500 17 20 23875 \n", "9 3.40 7.00 160 5500 16 22 NaN \n", "10 2.80 8.80 101 5800 23 29 16430 \n", "11 2.80 8.80 101 5800 23 29 16925 \n", "12 3.19 9.00 121 4250 21 28 20970 \n", "13 3.19 9.00 121 4250 21 28 21105 \n", "14 3.19 9.00 121 4250 20 25 24565 \n", "15 3.39 8.00 182 5400 16 22 30760 \n", "16 3.39 8.00 182 5400 16 22 41315 \n", "17 3.39 8.00 182 5400 15 20 36880 \n", "18 3.03 9.50 48 5100 47 53 5151 \n", "19 3.11 9.60 70 5400 38 43 6295 \n", "20 3.11 9.60 70 5400 38 43 6575 \n", "21 3.23 9.41 68 5500 37 41 5572 \n", "22 3.23 9.40 68 5500 31 38 6377 \n", "23 3.39 7.60 102 5500 24 30 7957 \n", "24 3.23 9.40 68 5500 31 38 6229 \n", "25 3.23 9.40 68 5500 31 38 6692 \n", "26 3.23 9.40 68 5500 31 38 7609 \n", "27 3.39 7.60 102 5500 24 30 8558 \n", "28 3.46 8.50 88 5000 24 30 8921 \n", "29 3.90 7.00 145 5000 19 24 12964 \n", ".. ... ... ... ... ... ... ... \n", "175 3.54 8.70 92 4200 27 32 9988 \n", "176 3.54 8.70 92 4200 27 32 10898 \n", "177 3.54 8.70 92 4200 27 32 11248 \n", "178 3.35 9.30 161 5200 20 24 16558 \n", "179 3.35 9.30 161 5200 19 24 15998 \n", "180 3.35 9.20 156 5200 20 24 15690 \n", "181 3.35 9.20 156 5200 19 24 15750 \n", "182 3.40 23.00 52 4800 37 46 7775 \n", "183 3.40 9.00 85 5250 27 34 7975 \n", "184 3.40 23.00 52 4800 37 46 7995 \n", "185 3.40 9.00 85 5250 27 34 8195 \n", "186 3.40 9.00 85 5250 27 34 8495 \n", "187 3.40 23.00 68 4500 37 42 9495 \n", "188 3.40 10.00 100 5500 26 32 9995 \n", "189 3.40 8.50 90 5500 24 29 11595 \n", "190 3.40 8.50 90 5500 24 29 9980 \n", "191 3.40 8.50 110 5500 19 24 13295 \n", "192 3.40 23.00 68 4500 33 38 13845 \n", "193 3.40 9.00 88 5500 25 31 12290 \n", "194 3.15 9.50 114 5400 23 28 12940 \n", "195 3.15 9.50 114 5400 23 28 13415 \n", "196 3.15 9.50 114 5400 24 28 15985 \n", "197 3.15 9.50 114 5400 24 28 16515 \n", "198 3.15 7.50 162 5100 17 22 18420 \n", "199 3.15 7.50 162 5100 17 22 18950 \n", "200 3.15 9.50 114 5400 23 28 16845 \n", "201 3.15 8.70 160 5300 19 25 19045 \n", "202 2.87 8.80 134 5500 18 23 21485 \n", "203 3.40 23.00 106 4800 26 27 22470 \n", "204 3.15 9.50 114 5400 19 25 22625 \n", "\n", "[205 rows x 14 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numeric_cars = numeric_cars.replace('?', np.nan)\n", "numeric_cars" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "normalized-losses 41\n", "wheel-base 0\n", "length 0\n", "width 0\n", "height 0\n", "curb-weight 0\n", "bore 4\n", "stroke 4\n", "compression-rate 0\n", "horsepower 2\n", "peak-rpm 2\n", "city-mpg 0\n", "highway-mpg 0\n", "price 4\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numeric_cars = numeric_cars.astype('float')\n", "numeric_cars.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "normalized-losses 37\n", "wheel-base 0\n", "length 0\n", "width 0\n", "height 0\n", "curb-weight 0\n", "bore 4\n", "stroke 4\n", "compression-rate 0\n", "horsepower 2\n", "peak-rpm 2\n", "city-mpg 0\n", "highway-mpg 0\n", "price 0\n", "dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Because `price` is the column we want to predict, let's remove any rows with missing `price` values.\n", "numeric_cars = numeric_cars.dropna(subset=['price'])\n", "numeric_cars.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "# Replace missing values in other columns using column means.\n", "numeric_cars = numeric_cars.fillna(numeric_cars.mean())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "normalized-losses 0\n", "wheel-base 0\n", "length 0\n", "width 0\n", "height 0\n", "curb-weight 0\n", "bore 0\n", "stroke 0\n", "compression-rate 0\n", "horsepower 0\n", "peak-rpm 0\n", "city-mpg 0\n", "highway-mpg 0\n", "price 0\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Confirm that there's no more missing values!\n", "numeric_cars.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "# Normalize all columnns to range from 0 to 1 except the target column.\n", "price_col = numeric_cars['price']\n", "numeric_cars = (numeric_cars - numeric_cars.min())/(numeric_cars.max() - numeric_cars.min())\n", "numeric_cars['price'] = price_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Univariate Model" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "horsepower 4037.037713\n", "curb-weight 4401.118255\n", "highway-mpg 4630.026799\n", "width 4704.482590\n", "city-mpg 4766.422505\n", "length 5427.200961\n", "wheel-base 5461.553998\n", "compression-rate 6610.812153\n", "bore 6780.627785\n", "normalized-losses 7330.197653\n", "peak-rpm 7697.459696\n", "stroke 8006.529545\n", "height 8144.441043\n", "dtype: float64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import KNeighborsRegressor\n", "from sklearn.metrics import mean_squared_error\n", "\n", "def knn_train_test(train_col, target_col, df):\n", " knn = KNeighborsRegressor()\n", " np.random.seed(1)\n", " \n", " # Randomize order of rows in data frame.\n", " shuffled_index = np.random.permutation(df.index)\n", " rand_df = df.reindex(shuffled_index)\n", "\n", " # Divide number of rows in half and round.\n", " last_train_row = int(len(rand_df) / 2)\n", " \n", " # Select the first half and set as training set.\n", " # Select the second half and set as test set.\n", " train_df = rand_df.iloc[0:last_train_row]\n", " test_df = rand_df.iloc[last_train_row:]\n", " \n", " # Fit a KNN model using default k value.\n", " knn.fit(train_df[[train_col]], train_df[target_col])\n", " \n", " # Make predictions using model.\n", " predicted_labels = knn.predict(test_df[[train_col]])\n", "\n", " # Calculate and return RMSE.\n", " mse = mean_squared_error(test_df[target_col], predicted_labels)\n", " rmse = np.sqrt(mse)\n", " return rmse\n", "\n", "rmse_results = {}\n", "train_cols = numeric_cars.columns.drop('price')\n", "\n", "# For each column (minus `price`), train a model, return RMSE value\n", "# and add to the dictionary `rmse_results`.\n", "for col in train_cols:\n", " rmse_val = knn_train_test(col, 'price', numeric_cars)\n", " rmse_results[col] = rmse_val\n", "\n", "# Create a Series object from the dictionary so \n", "# we can easily view the results, sort, etc\n", "rmse_results_series = pd.Series(rmse_results)\n", "rmse_results_series.sort_values()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'normalized-losses': {1: 7846.750605148984,\n", " 3: 7500.5698123109905,\n", " 5: 7330.197653434445,\n", " 7: 7756.421586234123,\n", " 9: 7688.096096891432},\n", " 'wheel-base': {1: 4493.734068810494,\n", " 3: 5120.161506064513,\n", " 5: 5461.553997873057,\n", " 7: 5448.1070513823315,\n", " 9: 5738.405685192312},\n", " 'length': {1: 4628.45550121557,\n", " 3: 5129.8358210721635,\n", " 5: 5427.2009608367125,\n", " 7: 5313.427720847974,\n", " 9: 5383.054514833446},\n", " 'width': {1: 4559.257297950061,\n", " 3: 4606.413692169901,\n", " 5: 4704.482589704386,\n", " 7: 4571.485046194653,\n", " 9: 4652.914172067787},\n", " 'height': {1: 8904.04645636071,\n", " 3: 8277.609643045525,\n", " 5: 8144.441042663747,\n", " 7: 7679.598124393773,\n", " 9: 7811.03606291223},\n", " 'curb-weight': {1: 5264.290230758878,\n", " 3: 5022.318011757233,\n", " 5: 4401.118254793124,\n", " 7: 4330.608104418053,\n", " 9: 4632.044474454401},\n", " 'bore': {1: 8602.58848450066,\n", " 3: 6984.239489480916,\n", " 5: 6780.627784685976,\n", " 7: 6878.097965921532,\n", " 9: 6866.808502038413},\n", " 'stroke': {1: 9116.495955406906,\n", " 3: 7338.68466990294,\n", " 5: 8006.529544647101,\n", " 7: 7803.937796804327,\n", " 9: 7735.554366079291},\n", " 'compression-rate': {1: 8087.205346523092,\n", " 3: 7375.063685578359,\n", " 5: 6610.812153159129,\n", " 7: 6732.801282941515,\n", " 9: 7024.485525463435},\n", " 'horsepower': {1: 4170.054848037801,\n", " 3: 4020.8492630885394,\n", " 5: 4037.0377131537603,\n", " 7: 4353.811860277134,\n", " 9: 4515.135617419103},\n", " 'peak-rpm': {1: 9511.480067750124,\n", " 3: 8537.550899973421,\n", " 5: 7697.4596964334805,\n", " 7: 7510.294160083481,\n", " 9: 7340.041341263401},\n", " 'city-mpg': {1: 5901.143574354764,\n", " 3: 4646.746408727155,\n", " 5: 4766.422505090134,\n", " 7: 5232.523034167316,\n", " 9: 5465.209492527533},\n", " 'highway-mpg': {1: 6025.594966720739,\n", " 3: 4617.305019788554,\n", " 5: 4630.026798588056,\n", " 7: 4796.061440186946,\n", " 9: 5278.358056953987}}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def knn_train_test(train_col, target_col, df):\n", " np.random.seed(1)\n", " \n", " # Randomize order of rows in data frame.\n", " shuffled_index = np.random.permutation(df.index)\n", " rand_df = df.reindex(shuffled_index)\n", "\n", " # Divide number of rows in half and round.\n", " last_train_row = int(len(rand_df) / 2)\n", " \n", " # Select the first half and set as training set.\n", " # Select the second half and set as test set.\n", " train_df = rand_df.iloc[0:last_train_row]\n", " test_df = rand_df.iloc[last_train_row:]\n", " \n", " k_values = [1,3,5,7,9]\n", " k_rmses = {}\n", " \n", " for k in k_values:\n", " # Fit model using k nearest neighbors.\n", " knn = KNeighborsRegressor(n_neighbors=k)\n", " knn.fit(train_df[[train_col]], train_df[target_col])\n", "\n", " # Make predictions using model.\n", " predicted_labels = knn.predict(test_df[[train_col]])\n", "\n", " # Calculate and return RMSE.\n", " mse = mean_squared_error(test_df[target_col], predicted_labels)\n", " rmse = np.sqrt(mse)\n", " \n", " k_rmses[k] = rmse\n", " return k_rmses\n", "\n", "k_rmse_results = {}\n", "\n", "# For each column (minus `price`), train a model, return RMSE value\n", "# and add to the dictionary `rmse_results`.\n", "train_cols = numeric_cars.columns.drop('price')\n", "for col in train_cols:\n", " rmse_val = knn_train_test(col, 'price', numeric_cars)\n", " k_rmse_results[col] = rmse_val\n", "\n", "k_rmse_results" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "for k,v in k_rmse_results.items():\n", " x = list(v.keys())\n", " y = list(v.values())\n", " \n", " plt.plot(x,y)\n", " plt.xlabel('k value')\n", " plt.ylabel('RMSE')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Multivariate Model" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "horsepower 4219.377860\n", "width 4618.910560\n", "curb-weight 4730.075815\n", "highway-mpg 5069.469256\n", "length 5176.394904\n", "city-mpg 5202.409003\n", "wheel-base 5252.392462\n", "compression-rate 7166.073599\n", "bore 7222.472445\n", "normalized-losses 7624.407151\n", "stroke 8000.240467\n", "peak-rpm 8119.365233\n", "height 8163.346266\n", "dtype: float64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Compute average RMSE across different `k` values for each feature.\n", "feature_avg_rmse = {}\n", "for k,v in k_rmse_results.items():\n", " avg_rmse = np.mean(list(v.values()))\n", " feature_avg_rmse[k] = avg_rmse\n", "series_avg_rmse = pd.Series(feature_avg_rmse)\n", "series_avg_rmse.sort_values()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'two best features': {5: 3589.3132622073304},\n", " 'three best features': {5: 3305.9401397969677},\n", " 'four best features': {5: 3280.3807311630976},\n", " 'five best features': {5: 3341.6024539726504},\n", " 'six best features': {5: 3628.261188214127}}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def knn_train_test(train_cols, target_col, df):\n", " np.random.seed(1)\n", " \n", " # Randomize order of rows in data frame.\n", " shuffled_index = np.random.permutation(df.index)\n", " rand_df = df.reindex(shuffled_index)\n", "\n", " # Divide number of rows in half and round.\n", " last_train_row = int(len(rand_df) / 2)\n", " \n", " # Select the first half and set as training set.\n", " # Select the second half and set as test set.\n", " train_df = rand_df.iloc[0:last_train_row]\n", " test_df = rand_df.iloc[last_train_row:]\n", " \n", " k_values = [5]\n", " k_rmses = {}\n", " \n", " for k in k_values:\n", " # Fit model using k nearest neighbors.\n", " knn = KNeighborsRegressor(n_neighbors=k)\n", " knn.fit(train_df[train_cols], train_df[target_col])\n", "\n", " # Make predictions using model.\n", " predicted_labels = knn.predict(test_df[train_cols])\n", "\n", " # Calculate and return RMSE.\n", " mse = mean_squared_error(test_df[target_col], predicted_labels)\n", " rmse = np.sqrt(mse)\n", " \n", " k_rmses[k] = rmse\n", " return k_rmses\n", "\n", "k_rmse_results = {}\n", "\n", "two_best_features = ['horsepower', 'width']\n", "rmse_val = knn_train_test(two_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"two best features\"] = rmse_val\n", "\n", "three_best_features = ['horsepower', 'width', 'curb-weight']\n", "rmse_val = knn_train_test(three_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"three best features\"] = rmse_val\n", "\n", "four_best_features = ['horsepower', 'width', 'curb-weight', 'city-mpg']\n", "rmse_val = knn_train_test(four_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"four best features\"] = rmse_val\n", "\n", "five_best_features = ['horsepower', 'width', 'curb-weight' , 'city-mpg' , 'highway-mpg']\n", "rmse_val = knn_train_test(five_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"five best features\"] = rmse_val\n", "\n", "six_best_features = ['horsepower', 'width', 'curb-weight' , 'city-mpg' , 'highway-mpg', 'length']\n", "rmse_val = knn_train_test(six_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"six best features\"] = rmse_val\n", "\n", "k_rmse_results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Multivariate Model" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'three best features': {1: 3013.0109985241875,\n", " 2: 2813.285969825997,\n", " 3: 3171.585284478674,\n", " 4: 3182.3137417981943,\n", " 5: 3305.9401397969677,\n", " 6: 3522.506848900376,\n", " 7: 3774.3772094554106,\n", " 8: 3978.969124021116,\n", " 9: 3992.923680588881,\n", " 10: 4076.2381473803043,\n", " 11: 4156.388331131807,\n", " 12: 4201.10713385948,\n", " 13: 4303.62676861325,\n", " 14: 4359.693296989702,\n", " 15: 4371.771103372868,\n", " 16: 4394.4846551644205,\n", " 17: 4510.399710057406,\n", " 18: 4584.310961865486,\n", " 19: 4636.62620477063,\n", " 20: 4664.465847866811,\n", " 21: 4724.096637428273,\n", " 22: 4752.535484102914,\n", " 23: 4808.703310452101,\n", " 24: 4858.9452710176065},\n", " 'four best features': {1: 2516.589279950226,\n", " 2: 2811.1727257604443,\n", " 3: 3013.3615157930335,\n", " 4: 3126.269482641261,\n", " 5: 3280.3807311630976,\n", " 6: 3479.5099401018138,\n", " 7: 3781.5612742414464,\n", " 8: 3988.8669577450623,\n", " 9: 4024.998621362245,\n", " 10: 4042.247226187899,\n", " 11: 4230.335248684068,\n", " 12: 4354.008675154061,\n", " 13: 4461.626664877788,\n", " 14: 4468.029845088927,\n", " 15: 4444.164407677133,\n", " 16: 4492.069868571789,\n", " 17: 4597.262222692034,\n", " 18: 4615.446316768325,\n", " 19: 4692.5688728042505,\n", " 20: 4727.815985112382,\n", " 21: 4738.233067652869,\n", " 22: 4778.176687231466,\n", " 23: 4811.556798910073,\n", " 24: 4846.548529789929},\n", " 'five best features': {1: 2530.0554077602005,\n", " 2: 2897.1757974767684,\n", " 3: 3183.4597426217424,\n", " 4: 3168.0230415758415,\n", " 5: 3341.6024539726504,\n", " 6: 3537.520542998191,\n", " 7: 3817.290452118825,\n", " 8: 4004.414529685573,\n", " 9: 3992.3186041830318,\n", " 10: 4138.728787853335,\n", " 11: 4342.052852829098,\n", " 12: 4416.145381031136,\n", " 13: 4493.8798141857205,\n", " 14: 4463.501601554571,\n", " 15: 4474.480783207805,\n", " 16: 4553.5595362768345,\n", " 17: 4597.001081375769,\n", " 18: 4655.283442648355,\n", " 19: 4687.900893982822,\n", " 20: 4712.022985543165,\n", " 21: 4755.856625823773,\n", " 22: 4788.162720030673,\n", " 23: 4806.923381965054,\n", " 24: 4879.983352195467}}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def knn_train_test(train_cols, target_col, df):\n", " np.random.seed(1)\n", " \n", " # Randomize order of rows in data frame.\n", " shuffled_index = np.random.permutation(df.index)\n", " rand_df = df.reindex(shuffled_index)\n", "\n", " # Divide number of rows in half and round.\n", " last_train_row = int(len(rand_df) / 2)\n", " \n", " # Select the first half and set as training set.\n", " # Select the second half and set as test set.\n", " train_df = rand_df.iloc[0:last_train_row]\n", " test_df = rand_df.iloc[last_train_row:]\n", " \n", " k_values = [i for i in range(1, 25)]\n", " k_rmses = {}\n", " \n", " for k in k_values:\n", " # Fit model using k nearest neighbors.\n", " knn = KNeighborsRegressor(n_neighbors=k)\n", " knn.fit(train_df[train_cols], train_df[target_col])\n", "\n", " # Make predictions using model.\n", " predicted_labels = knn.predict(test_df[train_cols])\n", "\n", " # Calculate and return RMSE.\n", " mse = mean_squared_error(test_df[target_col], predicted_labels)\n", " rmse = np.sqrt(mse)\n", " \n", " k_rmses[k] = rmse\n", " return k_rmses\n", "\n", "k_rmse_results = {}\n", "\n", "three_best_features = ['horsepower', 'width', 'curb-weight']\n", "rmse_val = knn_train_test(three_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"three best features\"] = rmse_val\n", "\n", "four_best_features = ['horsepower', 'width', 'curb-weight', 'city-mpg']\n", "rmse_val = knn_train_test(four_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"four best features\"] = rmse_val\n", "\n", "five_best_features = ['horsepower', 'width', 'curb-weight' , 'city-mpg' , 'highway-mpg']\n", "rmse_val = knn_train_test(five_best_features, 'price', numeric_cars)\n", "k_rmse_results[\"five best features\"] = rmse_val\n", "\n", "k_rmse_results" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "for k,v in k_rmse_results.items():\n", " x = list(v.keys())\n", " y = list(v.values())\n", " \n", " plt.plot(x,y)\n", " plt.xlabel('k value')\n", " plt.ylabel('RMSE')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }