|
@@ -0,0 +1,158 @@
|
|
|
|
+{
|
|
|
|
+ "cells": [
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
+ "metadata": {
|
|
|
|
+ "collapsed": true
|
|
|
|
+ },
|
|
|
|
+ "source": [
|
|
|
|
+ "# Introduction to the data"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "collapsed": true
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "import pandas as pd\n",
|
|
|
|
+ "df = pd.read_csv(\"academy_awards.csv\", encoding=\"ISO-8859-1\")\n",
|
|
|
|
+ "df"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "source": [
|
|
|
|
+ "# Filtering the data"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "collapsed": true
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "df[\"Year\"] = df[\"Year\"].str[0:4]\n",
|
|
|
|
+ "df[\"Year\"] = df[\"Year\"].astype(\"int64\")\n",
|
|
|
|
+ "later_than_2000 = df[df[\"Year\"] > 2000]\n",
|
|
|
|
+ "award_categories = [\"Actor -- Leading Role\",\"Actor -- Supporting Role\", \"Actress -- Leading Role\", \"Actress -- Supporting Role\"]\n",
|
|
|
|
+ "nominations = later_than_2000[later_than_2000[\"Category\"].isin(award_categories)]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "source": [
|
|
|
|
+ "# Cleaning up the Won? and Unnamed columns"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "collapsed": true
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "replacements = { \"NO\": 0, \"YES\": 1 }\n",
|
|
|
|
+ "nominations[\"Won?\"] = nominations[\"Won?\"].map(replacements)\n",
|
|
|
|
+ "nominations[\"Won\"] = nominations[\"Won?\"]\n",
|
|
|
|
+ "drop_cols = [\"Won?\",\"Unnamed: 5\", \"Unnamed: 6\",\"Unnamed: 7\", \"Unnamed: 8\", \"Unnamed: 9\", \"Unnamed: 10\"]\n",
|
|
|
|
+ "final_nominations = nominations.drop(drop_cols, axis=1)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "source": [
|
|
|
|
+ "# Cleaning up the Additional Info column"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "collapsed": true
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "additional_info_one = final_nominations[\"Additional Info\"].str.rstrip(\"'}\")\n",
|
|
|
|
+ "additional_info_two = additional_info_one.str.split(\" {'\")\n",
|
|
|
|
+ "movie_names = additional_info_two.str[0]\n",
|
|
|
|
+ "characters = additional_info_two.str[1]\n",
|
|
|
|
+ "final_nominations[\"Movie\"] = movie_names\n",
|
|
|
|
+ "final_nominations[\"Character\"] = characters\n",
|
|
|
|
+ "final_nominations = final_nominations.drop(\"Additional Info\", axis=1)\n",
|
|
|
|
+ "final_nominations"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "source": [
|
|
|
|
+ "# Exporting to SQLite"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "collapsed": true
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "import sqlite3\n",
|
|
|
|
+ "conn = sqlite3.connect(\"nominations.db\")\n",
|
|
|
|
+ "final_nominations.to_sql(\"nominations\", conn, index=False)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "source": [
|
|
|
|
+ "# Verifying in SQL"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "collapsed": true
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "query_one = \"pragma table_info(nominations);\"\n",
|
|
|
|
+ "query_two = \"select * from nominations limit 10;\"\n",
|
|
|
|
+ "print(conn.execute(query_one).fetchall())\n",
|
|
|
|
+ "print(conn.execute(query_two).fetchall())\n",
|
|
|
|
+ "conn.close()"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "metadata": {
|
|
|
|
+ "kernelspec": {
|
|
|
|
+ "display_name": "Python 3",
|
|
|
|
+ "language": "python",
|
|
|
|
+ "name": "python3"
|
|
|
|
+ },
|
|
|
|
+ "language_info": {
|
|
|
|
+ "codemirror_mode": {
|
|
|
|
+ "name": "ipython",
|
|
|
|
+ "version": 3
|
|
|
|
+ },
|
|
|
|
+ "file_extension": ".py",
|
|
|
|
+ "mimetype": "text/x-python",
|
|
|
|
+ "name": "python",
|
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
|
+ "version": "3.5.0"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "nbformat": 4,
|
|
|
|
+ "nbformat_minor": 0
|
|
|
|
+}
|