Queer European MD passionate about IT
Browse Source

added mission 9

Srini Kadamati 8 years ago
parent
commit
2c644dbfe4
1 changed files with 438 additions and 0 deletions
  1. 438 0
      Mission9Solutions.ipynb

+ 438 - 0
Mission9Solutions.ipynb

@@ -0,0 +1,438 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "# Introduction To The Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "csv_list = open(\"US_births_1994-2003_CDC_NCHS.csv\").read().split(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['year,month,date_of_month,day_of_week,births',\n",
+       " '1994,1,1,6,8096',\n",
+       " '1994,1,2,7,7772',\n",
+       " '1994,1,3,1,10142',\n",
+       " '1994,1,4,2,11248',\n",
+       " '1994,1,5,3,11053',\n",
+       " '1994,1,6,4,11406',\n",
+       " '1994,1,7,5,11251',\n",
+       " '1994,1,8,6,8653',\n",
+       " '1994,1,9,7,7910']"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "csv_list[0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Converting Data Into A List Of Lists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def read_csv(filename):\n",
+    "    data_string = open(filename).read()\n",
+    "    data_list = data_string.split(\"\\n\")[1:]\n",
+    "    final_list = list()\n",
+    "    \n",
+    "    for row in data_list:\n",
+    "        row_list = row.split(\",\")\n",
+    "        int_row_list = list()\n",
+    "        for val_string in row_list:\n",
+    "            int_row_list.append(int(val_string))\n",
+    "        final_list.append(int_row_list)\n",
+    "    return final_list\n",
+    "        \n",
+    "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[1994, 1, 1, 6, 8096],\n",
+       " [1994, 1, 2, 7, 7772],\n",
+       " [1994, 1, 3, 1, 10142],\n",
+       " [1994, 1, 4, 2, 11248],\n",
+       " [1994, 1, 5, 3, 11053],\n",
+       " [1994, 1, 6, 4, 11406],\n",
+       " [1994, 1, 7, 5, 11251],\n",
+       " [1994, 1, 8, 6, 8653],\n",
+       " [1994, 1, 9, 7, 7910],\n",
+       " [1994, 1, 10, 1, 10498]]"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_list[0:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculating Number Of Births Each Month"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def month_births(data):\n",
+    "    month_sums = dict()\n",
+    "    \n",
+    "    for row in data:\n",
+    "        month = row[1]\n",
+    "        births = row[4]\n",
+    "        if month in month_sums:\n",
+    "            month_sums[month] = month_sums[month] + births\n",
+    "        else:\n",
+    "            month_sums[month] = births\n",
+    "    return month_sums\n",
+    "    \n",
+    "cdc_month_births = month_births(cdc_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 3232517,\n",
+       " 2: 3018140,\n",
+       " 3: 3322069,\n",
+       " 4: 3185314,\n",
+       " 5: 3350907,\n",
+       " 6: 3296530,\n",
+       " 7: 3498783,\n",
+       " 8: 3525858,\n",
+       " 9: 3439698,\n",
+       " 10: 3378814,\n",
+       " 11: 3171647,\n",
+       " 12: 3301860}"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_month_births"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculating Number Of Births Each Day Of Week"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def dow_births(data):\n",
+    "    dow_sums = dict()\n",
+    "    \n",
+    "    for row in data:\n",
+    "        dow = row[3]\n",
+    "        births = row[4]\n",
+    "        if dow in dow_sums:\n",
+    "            dow_sums[dow] = dow_sums[dow] + births\n",
+    "        else:\n",
+    "            dow_sums[dow] = births\n",
+    "    return dow_sums\n",
+    "    \n",
+    "cdc_dow_births = dow_births(cdc_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 5789166,\n",
+       " 2: 6446196,\n",
+       " 3: 6322855,\n",
+       " 4: 6288429,\n",
+       " 5: 6233657,\n",
+       " 6: 4562111,\n",
+       " 7: 4079723}"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dow_births"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating A More General Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def calc_counts(data, col_num):\n",
+    "    sums_dict = dict()\n",
+    "    \n",
+    "    for row in data:\n",
+    "        col_value = row[col_num]\n",
+    "        births = row[4]\n",
+    "        if col_value in sums_dict:\n",
+    "            sums_dict[col_value] = sums_dict[col_value] + births\n",
+    "        else:\n",
+    "            sums_dict[col_value] = births\n",
+    "    return sums_dict\n",
+    "\n",
+    "cdc_year_births = calc_counts(cdc_list, 0)\n",
+    "cdc_month_births = calc_counts(cdc_list, 1)\n",
+    "cdc_dom_births = calc_counts(cdc_list, 2)\n",
+    "cdc_dow_births = calc_counts(cdc_list, 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1994: 3952767,\n",
+       " 1995: 3899589,\n",
+       " 1996: 3891494,\n",
+       " 1997: 3880894,\n",
+       " 1998: 3941553,\n",
+       " 1999: 3959417,\n",
+       " 2000: 4058814,\n",
+       " 2001: 4025933,\n",
+       " 2002: 4021726,\n",
+       " 2003: 4089950}"
+      ]
+     },
+     "execution_count": 102,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_year_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 3232517,\n",
+       " 2: 3018140,\n",
+       " 3: 3322069,\n",
+       " 4: 3185314,\n",
+       " 5: 3350907,\n",
+       " 6: 3296530,\n",
+       " 7: 3498783,\n",
+       " 8: 3525858,\n",
+       " 9: 3439698,\n",
+       " 10: 3378814,\n",
+       " 11: 3171647,\n",
+       " 12: 3301860}"
+      ]
+     },
+     "execution_count": 103,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_month_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 1276557,\n",
+       " 2: 1288739,\n",
+       " 3: 1304499,\n",
+       " 4: 1288154,\n",
+       " 5: 1299953,\n",
+       " 6: 1304474,\n",
+       " 7: 1310459,\n",
+       " 8: 1312297,\n",
+       " 9: 1303292,\n",
+       " 10: 1320764,\n",
+       " 11: 1314361,\n",
+       " 12: 1318437,\n",
+       " 13: 1277684,\n",
+       " 14: 1320153,\n",
+       " 15: 1319171,\n",
+       " 16: 1315192,\n",
+       " 17: 1324953,\n",
+       " 18: 1326855,\n",
+       " 19: 1318727,\n",
+       " 20: 1324821,\n",
+       " 21: 1322897,\n",
+       " 22: 1317381,\n",
+       " 23: 1293290,\n",
+       " 24: 1288083,\n",
+       " 25: 1272116,\n",
+       " 26: 1284796,\n",
+       " 27: 1294395,\n",
+       " 28: 1307685,\n",
+       " 29: 1223161,\n",
+       " 30: 1202095,\n",
+       " 31: 746696}"
+      ]
+     },
+     "execution_count": 104,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dom_births"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: 5789166,\n",
+       " 2: 6446196,\n",
+       " 3: 6322855,\n",
+       " 4: 6288429,\n",
+       " 5: 6233657,\n",
+       " 6: 4562111,\n",
+       " 7: 4079723}"
+      ]
+     },
+     "execution_count": 105,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cdc_dow_births"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [conda env:envdq]",
+   "language": "python",
+   "name": "conda-env-envdq-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}