Queer European MD passionate about IT
Browse Source

Merge pull request #18 from dataquestio/fix/srini/fixreadme

added mission207 to readme.md
Srini Kadamati 8 years ago
parent
commit
eb2ec13605
2 changed files with 457 additions and 0 deletions
  1. 455 0
      Mission9Solutions.ipynb
  2. 2 0
      README.md

+ 455 - 0
Mission9Solutions.ipynb

@@ -0,0 +1,455 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "# Introduction To The Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "csv_list = open(\"US_births_1994-2003_CDC_NCHS.csv\").read().split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['year,month,date_of_month,day_of_week,births',\n",
+       " '1994,1,1,6,8096',\n",
+       " '1994,1,2,7,7772',\n",
+       " '1994,1,3,1,10142',\n",
+       " '1994,1,4,2,11248',\n",
+       " '1994,1,5,3,11053',\n",
+       " '1994,1,6,4,11406',\n",
+       " '1994,1,7,5,11251',\n",
+       " '1994,1,8,6,8653',\n",
+       " '1994,1,9,7,7910']"
+      ]
+     },
+     "execution_count": 137,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "csv_list[0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Converting Data Into A List Of Lists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def read_csv(filename):\n",
+    "    string_data = open(filename).read()\n",
+    "    string_list = string_data.split(\"\\n\")[1:]\n",
+    "    final_list = []\n",
+    "    \n",
+    "    for row in string_list:\n",
+    "        string_fields = row.split(\",\")\n",
+    "        int_fields = []\n",
+    "        for value in string_fields:\n",
+    "            int_fields.append(int(value))\n",
+    "        final_list.append(int_fields)\n",
+    "    return final_list\n",
+    "        \n",
+    "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 139,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[1994, 1, 1, 6, 8096],\n",
+       " [1994, 1, 2, 7, 7772],\n",
+       " [1994, 1, 3, 1, 10142],\n",
+       " [1994, 1, 4, 2, 11248],\n",
+       " [1994, 1, 5, 3, 11053],\n",
+       " [1994, 1, 6, 4, 11406],\n",
+       " [1994, 1, 7, 5, 11251],\n",
+       " [1994, 1, 8, 6, 8653],\n",
+       " [1994, 1, 9, 7, 7910],\n",
+       " [1994, 1, 10, 1, 10498]]"
+      ]
+     },
+     "execution_count": 139,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_list[0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculating Number Of Births Each Month"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def read_csv(filename):\n",
+    "    string_data = open(filename).read()\n",
+    "    string_list = string_data.split(\"\\n\")[1:]\n",
+    "    final_list = []\n",
+    "    \n",
+    "    for row in string_list:\n",
+    "        string_fields = row.split(\",\")\n",
+    "        int_fields = []\n",
+    "        for value in string_fields:\n",
+    "            int_fields.append(int(value))\n",
+    "        final_list.append(int_fields)\n",
+    "    return final_list\n",
+    "        \n",
+    "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")\n",
+    "\n",
+    "\n",
+    "def month_births(data):\n",
+    "    births_per_month = {}\n",
+    "    \n",
+    "    for row in data:\n",
+    "        month = row[1]\n",
+    "        births = row[4]\n",
+    "        if month in births_per_month:\n",
+    "            births_per_month[month] = births_per_month[month] + births\n",
+    "        else:\n",
+    "            births_per_month[month] = births\n",
+    "    return births_per_month\n",
+    "    \n",
+    "cdc_month_births = month_births(cdc_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 3232517,\n",
+       " 2: 3018140,\n",
+       " 3: 3322069,\n",
+       " 4: 3185314,\n",
+       " 5: 3350907,\n",
+       " 6: 3296530,\n",
+       " 7: 3498783,\n",
+       " 8: 3525858,\n",
+       " 9: 3439698,\n",
+       " 10: 3378814,\n",
+       " 11: 3171647,\n",
+       " 12: 3301860}"
+      ]
+     },
+     "execution_count": 141,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_month_births"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculating Number Of Births Each Day Of Week"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def dow_births(data):\n",
+    "    births_per_dow = {}\n",
+    "    \n",
+    "    for row in data:\n",
+    "        dow = row[3]\n",
+    "        births = row[4]\n",
+    "        if dow in births_per_dow:\n",
+    "            births_per_dow[dow] = births_per_dow[dow] + births\n",
+    "        else:\n",
+    "            births_per_dow[dow] = births\n",
+    "    return births_per_dow\n",
+    "    \n",
+    "cdc_dow_births = dow_births(cdc_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 5789166,\n",
+       " 2: 6446196,\n",
+       " 3: 6322855,\n",
+       " 4: 6288429,\n",
+       " 5: 6233657,\n",
+       " 6: 4562111,\n",
+       " 7: 4079723}"
+      ]
+     },
+     "execution_count": 143,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dow_births"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating A More General Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def calc_counts(data, column):\n",
+    "    sums_dict = {}\n",
+    "    \n",
+    "    for row in data:\n",
+    "        col_value = row[column]\n",
+    "        births = row[4]\n",
+    "        if col_value in sums_dict:\n",
+    "            sums_dict[col_value] = sums_dict[col_value] + births\n",
+    "        else:\n",
+    "            sums_dict[col_value] = births\n",
+    "    return sums_dict\n",
+    "\n",
+    "cdc_year_births = calc_counts(cdc_list, 0)\n",
+    "cdc_month_births = calc_counts(cdc_list, 1)\n",
+    "cdc_dom_births = calc_counts(cdc_list, 2)\n",
+    "cdc_dow_births = calc_counts(cdc_list, 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1994: 3952767,\n",
+       " 1995: 3899589,\n",
+       " 1996: 3891494,\n",
+       " 1997: 3880894,\n",
+       " 1998: 3941553,\n",
+       " 1999: 3959417,\n",
+       " 2000: 4058814,\n",
+       " 2001: 4025933,\n",
+       " 2002: 4021726,\n",
+       " 2003: 4089950}"
+      ]
+     },
+     "execution_count": 145,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_year_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 146,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 3232517,\n",
+       " 2: 3018140,\n",
+       " 3: 3322069,\n",
+       " 4: 3185314,\n",
+       " 5: 3350907,\n",
+       " 6: 3296530,\n",
+       " 7: 3498783,\n",
+       " 8: 3525858,\n",
+       " 9: 3439698,\n",
+       " 10: 3378814,\n",
+       " 11: 3171647,\n",
+       " 12: 3301860}"
+      ]
+     },
+     "execution_count": 146,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_month_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 1276557,\n",
+       " 2: 1288739,\n",
+       " 3: 1304499,\n",
+       " 4: 1288154,\n",
+       " 5: 1299953,\n",
+       " 6: 1304474,\n",
+       " 7: 1310459,\n",
+       " 8: 1312297,\n",
+       " 9: 1303292,\n",
+       " 10: 1320764,\n",
+       " 11: 1314361,\n",
+       " 12: 1318437,\n",
+       " 13: 1277684,\n",
+       " 14: 1320153,\n",
+       " 15: 1319171,\n",
+       " 16: 1315192,\n",
+       " 17: 1324953,\n",
+       " 18: 1326855,\n",
+       " 19: 1318727,\n",
+       " 20: 1324821,\n",
+       " 21: 1322897,\n",
+       " 22: 1317381,\n",
+       " 23: 1293290,\n",
+       " 24: 1288083,\n",
+       " 25: 1272116,\n",
+       " 26: 1284796,\n",
+       " 27: 1294395,\n",
+       " 28: 1307685,\n",
+       " 29: 1223161,\n",
+       " 30: 1202095,\n",
+       " 31: 746696}"
+      ]
+     },
+     "execution_count": 147,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dom_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 5789166,\n",
+       " 2: 6446196,\n",
+       " 3: 6322855,\n",
+       " 4: 6288429,\n",
+       " 5: 6233657,\n",
+       " 6: 4562111,\n",
+       " 7: 4079723}"
+      ]
+     },
+     "execution_count": 148,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dow_births"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [conda env:envdq]",
+   "language": "python",
+   "name": "conda-env-envdq-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

+ 2 - 0
README.md

@@ -4,10 +4,12 @@ This repository is a series of notebooks that show solutions for the [projects](
 
 
 Of course, there are always going to be multiple ways to solve any one problem, so these notebooks just show one possible solution.
 Of course, there are always going to be multiple ways to solve any one problem, so these notebooks just show one possible solution.
 
 
+- [Guided Project: Explore U.S. Births](https://github.com/dataquestio/solutions/blob/master/Mission9Solutions.ipynb)
 - [Guided Project: Customizing Data Visualizations](https://github.com/dataquestio/solutions/blob/master/Mission103Solutions.ipynb)
 - [Guided Project: Customizing Data Visualizations](https://github.com/dataquestio/solutions/blob/master/Mission103Solutions.ipynb)
 - [Guided Project: Star Wars survey](https://github.com/dataquestio/solutions/blob/master/Mission201Solution.ipynb)
 - [Guided Project: Star Wars survey](https://github.com/dataquestio/solutions/blob/master/Mission201Solution.ipynb)
 - [Guided Project: Police killings](https://github.com/dataquestio/solutions/blob/master/Mission202Solution.ipynb)
 - [Guided Project: Police killings](https://github.com/dataquestio/solutions/blob/master/Mission202Solution.ipynb)
 - [Guided Project: Visualizing Pixar's Roller Coaster](https://github.com/dataquestio/solutions/blob/master/Mission205Solutions.ipynb)
 - [Guided Project: Visualizing Pixar's Roller Coaster](https://github.com/dataquestio/solutions/blob/master/Mission205Solutions.ipynb)
+- [Guided Project: Using Jupyter Notebook](https://github.com/dataquestio/solutions/blob/master/Mission207Solutions.ipynb)
 - [Guided Project: Analyzing movie reviews](https://github.com/dataquestio/solutions/blob/master/Mission209Solution.ipynb)
 - [Guided Project: Analyzing movie reviews](https://github.com/dataquestio/solutions/blob/master/Mission209Solution.ipynb)
 - [Guided Project: Winning Jeopardy](https://github.com/dataquestio/solutions/blob/master/Mission210Solution.ipynb)
 - [Guided Project: Winning Jeopardy](https://github.com/dataquestio/solutions/blob/master/Mission210Solution.ipynb)
 - [Guided Project: Predicting board game reviews](https://github.com/dataquestio/solutions/blob/master/Mission211Solution.ipynb)
 - [Guided Project: Predicting board game reviews](https://github.com/dataquestio/solutions/blob/master/Mission211Solution.ipynb)