Queer European MD passionate about IT
Srini Kadamati 9 năm trước cách đây
mục cha
commit
2a9cd1cc2e
1 tập tin đã thay đổi với 158 bổ sung0 xóa
  1. 158 0
      Mission215Solutions.ipynb

+ 158 - 0
Mission215Solutions.ipynb

@@ -0,0 +1,158 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Introduction to the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "df = pd.read_csv(\"academy_awards.csv\", encoding=\"ISO-8859-1\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Filtering the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "df[\"Year\"] = df[\"Year\"].str[0:4]\n",
+    "df[\"Year\"] = df[\"Year\"].astype(\"int64\")\n",
+    "later_than_2000 = df[df[\"Year\"] > 2000]\n",
+    "award_categories = [\"Actor -- Leading Role\",\"Actor -- Supporting Role\", \"Actress -- Leading Role\", \"Actress -- Supporting Role\"]\n",
+    "nominations = later_than_2000[later_than_2000[\"Category\"].isin(award_categories)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cleaning up the Won? and Unnamed columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "replacements = { \"NO\": 0, \"YES\": 1 }\n",
+    "nominations[\"Won?\"] = nominations[\"Won?\"].map(replacements)\n",
+    "nominations[\"Won\"] = nominations[\"Won?\"]\n",
+    "drop_cols = [\"Won?\",\"Unnamed: 5\", \"Unnamed: 6\",\"Unnamed: 7\", \"Unnamed: 8\", \"Unnamed: 9\", \"Unnamed: 10\"]\n",
+    "final_nominations = nominations.drop(drop_cols, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cleaning up the Additional Info column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "additional_info_one = final_nominations[\"Additional Info\"].str.rstrip(\"'}\")\n",
+    "additional_info_two = additional_info_one.str.split(\" {'\")\n",
+    "movie_names = additional_info_two.str[0]\n",
+    "characters = additional_info_two.str[1]\n",
+    "final_nominations[\"Movie\"] = movie_names\n",
+    "final_nominations[\"Character\"] = characters\n",
+    "final_nominations = final_nominations.drop(\"Additional Info\", axis=1)\n",
+    "final_nominations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exporting to SQLite"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import sqlite3\n",
+    "conn = sqlite3.connect(\"nominations.db\")\n",
+    "final_nominations.to_sql(\"nominations\", conn, index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Verifying in SQL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "query_one = \"pragma table_info(nominations);\"\n",
+    "query_two = \"select * from nominations limit 10;\"\n",
+    "print(conn.execute(query_one).fetchall())\n",
+    "print(conn.execute(query_two).fetchall())\n",
+    "conn.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}