Queer European MD passionate about IT
Browse Source

Merge pull request #18 from dataquestio/fix/srini/fixreadme

added mission207 to readme.md
Srini Kadamati 8 years ago
parent
commit
eb2ec13605
2 changed files with 457 additions and 0 deletions
  1. 455 0
      Mission9Solutions.ipynb
  2. 2 0
      README.md

+ 455 - 0
Mission9Solutions.ipynb

@@ -0,0 +1,455 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "# Introduction To The Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "csv_list = open(\"US_births_1994-2003_CDC_NCHS.csv\").read().split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['year,month,date_of_month,day_of_week,births',\n",
+       " '1994,1,1,6,8096',\n",
+       " '1994,1,2,7,7772',\n",
+       " '1994,1,3,1,10142',\n",
+       " '1994,1,4,2,11248',\n",
+       " '1994,1,5,3,11053',\n",
+       " '1994,1,6,4,11406',\n",
+       " '1994,1,7,5,11251',\n",
+       " '1994,1,8,6,8653',\n",
+       " '1994,1,9,7,7910']"
+      ]
+     },
+     "execution_count": 137,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "csv_list[0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Converting Data Into A List Of Lists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def read_csv(filename):\n",
+    "    string_data = open(filename).read()\n",
+    "    string_list = string_data.split(\"\\n\")[1:]\n",
+    "    final_list = []\n",
+    "    \n",
+    "    for row in string_list:\n",
+    "        string_fields = row.split(\",\")\n",
+    "        int_fields = []\n",
+    "        for value in string_fields:\n",
+    "            int_fields.append(int(value))\n",
+    "        final_list.append(int_fields)\n",
+    "    return final_list\n",
+    "        \n",
+    "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 139,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[1994, 1, 1, 6, 8096],\n",
+       " [1994, 1, 2, 7, 7772],\n",
+       " [1994, 1, 3, 1, 10142],\n",
+       " [1994, 1, 4, 2, 11248],\n",
+       " [1994, 1, 5, 3, 11053],\n",
+       " [1994, 1, 6, 4, 11406],\n",
+       " [1994, 1, 7, 5, 11251],\n",
+       " [1994, 1, 8, 6, 8653],\n",
+       " [1994, 1, 9, 7, 7910],\n",
+       " [1994, 1, 10, 1, 10498]]"
+      ]
+     },
+     "execution_count": 139,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_list[0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculating Number Of Births Each Month"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def read_csv(filename):\n",
+    "    string_data = open(filename).read()\n",
+    "    string_list = string_data.split(\"\\n\")[1:]\n",
+    "    final_list = []\n",
+    "    \n",
+    "    for row in string_list:\n",
+    "        string_fields = row.split(\",\")\n",
+    "        int_fields = []\n",
+    "        for value in string_fields:\n",
+    "            int_fields.append(int(value))\n",
+    "        final_list.append(int_fields)\n",
+    "    return final_list\n",
+    "        \n",
+    "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")\n",
+    "\n",
+    "\n",
+    "def month_births(data):\n",
+    "    births_per_month = {}\n",
+    "    \n",
+    "    for row in data:\n",
+    "        month = row[1]\n",
+    "        births = row[4]\n",
+    "        if month in births_per_month:\n",
+    "            births_per_month[month] = births_per_month[month] + births\n",
+    "        else:\n",
+    "            births_per_month[month] = births\n",
+    "    return births_per_month\n",
+    "    \n",
+    "cdc_month_births = month_births(cdc_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 3232517,\n",
+       " 2: 3018140,\n",
+       " 3: 3322069,\n",
+       " 4: 3185314,\n",
+       " 5: 3350907,\n",
+       " 6: 3296530,\n",
+       " 7: 3498783,\n",
+       " 8: 3525858,\n",
+       " 9: 3439698,\n",
+       " 10: 3378814,\n",
+       " 11: 3171647,\n",
+       " 12: 3301860}"
+      ]
+     },
+     "execution_count": 141,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_month_births"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculating Number Of Births Each Day Of Week"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def dow_births(data):\n",
+    "    births_per_dow = {}\n",
+    "    \n",
+    "    for row in data:\n",
+    "        dow = row[3]\n",
+    "        births = row[4]\n",
+    "        if dow in births_per_dow:\n",
+    "            births_per_dow[dow] = births_per_dow[dow] + births\n",
+    "        else:\n",
+    "            births_per_dow[dow] = births\n",
+    "    return births_per_dow\n",
+    "    \n",
+    "cdc_dow_births = dow_births(cdc_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 5789166,\n",
+       " 2: 6446196,\n",
+       " 3: 6322855,\n",
+       " 4: 6288429,\n",
+       " 5: 6233657,\n",
+       " 6: 4562111,\n",
+       " 7: 4079723}"
+      ]
+     },
+     "execution_count": 143,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dow_births"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating A More General Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def calc_counts(data, column):\n",
+    "    sums_dict = {}\n",
+    "    \n",
+    "    for row in data:\n",
+    "        col_value = row[column]\n",
+    "        births = row[4]\n",
+    "        if col_value in sums_dict:\n",
+    "            sums_dict[col_value] = sums_dict[col_value] + births\n",
+    "        else:\n",
+    "            sums_dict[col_value] = births\n",
+    "    return sums_dict\n",
+    "\n",
+    "cdc_year_births = calc_counts(cdc_list, 0)\n",
+    "cdc_month_births = calc_counts(cdc_list, 1)\n",
+    "cdc_dom_births = calc_counts(cdc_list, 2)\n",
+    "cdc_dow_births = calc_counts(cdc_list, 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1994: 3952767,\n",
+       " 1995: 3899589,\n",
+       " 1996: 3891494,\n",
+       " 1997: 3880894,\n",
+       " 1998: 3941553,\n",
+       " 1999: 3959417,\n",
+       " 2000: 4058814,\n",
+       " 2001: 4025933,\n",
+       " 2002: 4021726,\n",
+       " 2003: 4089950}"
+      ]
+     },
+     "execution_count": 145,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_year_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 146,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 3232517,\n",
+       " 2: 3018140,\n",
+       " 3: 3322069,\n",
+       " 4: 3185314,\n",
+       " 5: 3350907,\n",
+       " 6: 3296530,\n",
+       " 7: 3498783,\n",
+       " 8: 3525858,\n",
+       " 9: 3439698,\n",
+       " 10: 3378814,\n",
+       " 11: 3171647,\n",
+       " 12: 3301860}"
+      ]
+     },
+     "execution_count": 146,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_month_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 1276557,\n",
+       " 2: 1288739,\n",
+       " 3: 1304499,\n",
+       " 4: 1288154,\n",
+       " 5: 1299953,\n",
+       " 6: 1304474,\n",
+       " 7: 1310459,\n",
+       " 8: 1312297,\n",
+       " 9: 1303292,\n",
+       " 10: 1320764,\n",
+       " 11: 1314361,\n",
+       " 12: 1318437,\n",
+       " 13: 1277684,\n",
+       " 14: 1320153,\n",
+       " 15: 1319171,\n",
+       " 16: 1315192,\n",
+       " 17: 1324953,\n",
+       " 18: 1326855,\n",
+       " 19: 1318727,\n",
+       " 20: 1324821,\n",
+       " 21: 1322897,\n",
+       " 22: 1317381,\n",
+       " 23: 1293290,\n",
+       " 24: 1288083,\n",
+       " 25: 1272116,\n",
+       " 26: 1284796,\n",
+       " 27: 1294395,\n",
+       " 28: 1307685,\n",
+       " 29: 1223161,\n",
+       " 30: 1202095,\n",
+       " 31: 746696}"
+      ]
+     },
+     "execution_count": 147,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dom_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 5789166,\n",
+       " 2: 6446196,\n",
+       " 3: 6322855,\n",
+       " 4: 6288429,\n",
+       " 5: 6233657,\n",
+       " 6: 4562111,\n",
+       " 7: 4079723}"
+      ]
+     },
+     "execution_count": 148,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dow_births"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [conda env:envdq]",
+   "language": "python",
+   "name": "conda-env-envdq-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

+ 2 - 0
README.md

@@ -4,10 +4,12 @@ This repository is a series of notebooks that show solutions for the [projects](
 
 Of course, there are always going to be multiple ways to solve any one problem, so these notebooks just show one possible solution.
 
+- [Guided Project: Explore U.S. Births](https://github.com/dataquestio/solutions/blob/master/Mission9Solutions.ipynb)
 - [Guided Project: Customizing Data Visualizations](https://github.com/dataquestio/solutions/blob/master/Mission103Solutions.ipynb)
 - [Guided Project: Star Wars survey](https://github.com/dataquestio/solutions/blob/master/Mission201Solution.ipynb)
 - [Guided Project: Police killings](https://github.com/dataquestio/solutions/blob/master/Mission202Solution.ipynb)
 - [Guided Project: Visualizing Pixar's Roller Coaster](https://github.com/dataquestio/solutions/blob/master/Mission205Solutions.ipynb)
+- [Guided Project: Using Jupyter Notebook](https://github.com/dataquestio/solutions/blob/master/Mission207Solutions.ipynb)
 - [Guided Project: Analyzing movie reviews](https://github.com/dataquestio/solutions/blob/master/Mission209Solution.ipynb)
 - [Guided Project: Winning Jeopardy](https://github.com/dataquestio/solutions/blob/master/Mission210Solution.ipynb)
 - [Guided Project: Predicting board game reviews](https://github.com/dataquestio/solutions/blob/master/Mission211Solution.ipynb)