{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Introduction to the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv(\"academy_awards.csv\", encoding=\"ISO-8859-1\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Filtering the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df[\"Year\"] = df[\"Year\"].str[0:4]\n",
    "df[\"Year\"] = df[\"Year\"].astype(\"int64\")\n",
    "later_than_2000 = df[df[\"Year\"] > 2000]\n",
    "award_categories = [\"Actor -- Leading Role\",\"Actor -- Supporting Role\", \"Actress -- Leading Role\", \"Actress -- Supporting Role\"]\n",
    "nominations = later_than_2000[later_than_2000[\"Category\"].isin(award_categories)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cleaning up the Won? and Unnamed columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "replacements = { \"NO\": 0, \"YES\": 1 }\n",
    "nominations[\"Won?\"] = nominations[\"Won?\"].map(replacements)\n",
    "nominations[\"Won\"] = nominations[\"Won?\"]\n",
    "drop_cols = [\"Won?\",\"Unnamed: 5\", \"Unnamed: 6\",\"Unnamed: 7\", \"Unnamed: 8\", \"Unnamed: 9\", \"Unnamed: 10\"]\n",
    "final_nominations = nominations.drop(drop_cols, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cleaning up the Additional Info column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "additional_info_one = final_nominations[\"Additional Info\"].str.rstrip(\"'}\")\n",
    "additional_info_two = additional_info_one.str.split(\" {'\")\n",
    "movie_names = additional_info_two.str[0]\n",
    "characters = additional_info_two.str[1]\n",
    "final_nominations[\"Movie\"] = movie_names\n",
    "final_nominations[\"Character\"] = characters\n",
    "final_nominations = final_nominations.drop(\"Additional Info\", axis=1)\n",
    "final_nominations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exporting to SQLite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import sqlite3\n",
    "conn = sqlite3.connect(\"nominations.db\")\n",
    "final_nominations.to_sql(\"nominations\", conn, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Verifying in SQL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "query_one = \"pragma table_info(nominations);\"\n",
    "query_two = \"select * from nominations limit 10;\"\n",
    "print(conn.execute(query_one).fetchall())\n",
    "print(conn.execute(query_two).fetchall())\n",
    "conn.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}