{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# Introduction To The Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "csv_list = open(\"US_births_1994-2003_CDC_NCHS.csv\").read().split(\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['year,month,date_of_month,day_of_week,births',\n",
       " '1994,1,1,6,8096',\n",
       " '1994,1,2,7,7772',\n",
       " '1994,1,3,1,10142',\n",
       " '1994,1,4,2,11248',\n",
       " '1994,1,5,3,11053',\n",
       " '1994,1,6,4,11406',\n",
       " '1994,1,7,5,11251',\n",
       " '1994,1,8,6,8653',\n",
       " '1994,1,9,7,7910']"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "csv_list[0:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Converting Data Into A List Of Lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def read_csv(filename):\n",
    "    string_data = open(filename).read()\n",
    "    string_list = string_data.split(\"\\n\")[1:]\n",
    "    final_list = []\n",
    "    \n",
    "    for row in string_list:\n",
    "        string_fields = row.split(\",\")\n",
    "        int_fields = []\n",
    "        for value in string_fields:\n",
    "            int_fields.append(int(value))\n",
    "        final_list.append(int_fields)\n",
    "    return final_list\n",
    "        \n",
    "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[1994, 1, 1, 6, 8096],\n",
       " [1994, 1, 2, 7, 7772],\n",
       " [1994, 1, 3, 1, 10142],\n",
       " [1994, 1, 4, 2, 11248],\n",
       " [1994, 1, 5, 3, 11053],\n",
       " [1994, 1, 6, 4, 11406],\n",
       " [1994, 1, 7, 5, 11251],\n",
       " [1994, 1, 8, 6, 8653],\n",
       " [1994, 1, 9, 7, 7910],\n",
       " [1994, 1, 10, 1, 10498]]"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdc_list[0:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Calculating Number Of Births Each Month"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def read_csv(filename):\n",
    "    string_data = open(filename).read()\n",
    "    string_list = string_data.split(\"\\n\")[1:]\n",
    "    final_list = []\n",
    "    \n",
    "    for row in string_list:\n",
    "        string_fields = row.split(\",\")\n",
    "        int_fields = []\n",
    "        for value in string_fields:\n",
    "            int_fields.append(int(value))\n",
    "        final_list.append(int_fields)\n",
    "    return final_list\n",
    "        \n",
    "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")\n",
    "\n",
    "\n",
    "def month_births(data):\n",
    "    births_per_month = {}\n",
    "    \n",
    "    for row in data:\n",
    "        month = row[1]\n",
    "        births = row[4]\n",
    "        if month in births_per_month:\n",
    "            births_per_month[month] = births_per_month[month] + births\n",
    "        else:\n",
    "            births_per_month[month] = births\n",
    "    return births_per_month\n",
    "    \n",
    "cdc_month_births = month_births(cdc_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: 3232517,\n",
       " 2: 3018140,\n",
       " 3: 3322069,\n",
       " 4: 3185314,\n",
       " 5: 3350907,\n",
       " 6: 3296530,\n",
       " 7: 3498783,\n",
       " 8: 3525858,\n",
       " 9: 3439698,\n",
       " 10: 3378814,\n",
       " 11: 3171647,\n",
       " 12: 3301860}"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdc_month_births"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Calculating Number Of Births Each Day Of Week"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def dow_births(data):\n",
    "    births_per_dow = {}\n",
    "    \n",
    "    for row in data:\n",
    "        dow = row[3]\n",
    "        births = row[4]\n",
    "        if dow in births_per_dow:\n",
    "            births_per_dow[dow] = births_per_dow[dow] + births\n",
    "        else:\n",
    "            births_per_dow[dow] = births\n",
    "    return births_per_dow\n",
    "    \n",
    "cdc_dow_births = dow_births(cdc_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: 5789166,\n",
       " 2: 6446196,\n",
       " 3: 6322855,\n",
       " 4: 6288429,\n",
       " 5: 6233657,\n",
       " 6: 4562111,\n",
       " 7: 4079723}"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdc_dow_births"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating A More General Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def calc_counts(data, column):\n",
    "    sums_dict = {}\n",
    "    \n",
    "    for row in data:\n",
    "        col_value = row[column]\n",
    "        births = row[4]\n",
    "        if col_value in sums_dict:\n",
    "            sums_dict[col_value] = sums_dict[col_value] + births\n",
    "        else:\n",
    "            sums_dict[col_value] = births\n",
    "    return sums_dict\n",
    "\n",
    "cdc_year_births = calc_counts(cdc_list, 0)\n",
    "cdc_month_births = calc_counts(cdc_list, 1)\n",
    "cdc_dom_births = calc_counts(cdc_list, 2)\n",
    "cdc_dow_births = calc_counts(cdc_list, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1994: 3952767,\n",
       " 1995: 3899589,\n",
       " 1996: 3891494,\n",
       " 1997: 3880894,\n",
       " 1998: 3941553,\n",
       " 1999: 3959417,\n",
       " 2000: 4058814,\n",
       " 2001: 4025933,\n",
       " 2002: 4021726,\n",
       " 2003: 4089950}"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdc_year_births"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: 3232517,\n",
       " 2: 3018140,\n",
       " 3: 3322069,\n",
       " 4: 3185314,\n",
       " 5: 3350907,\n",
       " 6: 3296530,\n",
       " 7: 3498783,\n",
       " 8: 3525858,\n",
       " 9: 3439698,\n",
       " 10: 3378814,\n",
       " 11: 3171647,\n",
       " 12: 3301860}"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdc_month_births"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: 1276557,\n",
       " 2: 1288739,\n",
       " 3: 1304499,\n",
       " 4: 1288154,\n",
       " 5: 1299953,\n",
       " 6: 1304474,\n",
       " 7: 1310459,\n",
       " 8: 1312297,\n",
       " 9: 1303292,\n",
       " 10: 1320764,\n",
       " 11: 1314361,\n",
       " 12: 1318437,\n",
       " 13: 1277684,\n",
       " 14: 1320153,\n",
       " 15: 1319171,\n",
       " 16: 1315192,\n",
       " 17: 1324953,\n",
       " 18: 1326855,\n",
       " 19: 1318727,\n",
       " 20: 1324821,\n",
       " 21: 1322897,\n",
       " 22: 1317381,\n",
       " 23: 1293290,\n",
       " 24: 1288083,\n",
       " 25: 1272116,\n",
       " 26: 1284796,\n",
       " 27: 1294395,\n",
       " 28: 1307685,\n",
       " 29: 1223161,\n",
       " 30: 1202095,\n",
       " 31: 746696}"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdc_dom_births"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: 5789166,\n",
       " 2: 6446196,\n",
       " 3: 6322855,\n",
       " 4: 6288429,\n",
       " 5: 6233657,\n",
       " 6: 4562111,\n",
       " 7: 4079723}"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdc_dow_births"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:envdq]",
   "language": "python",
   "name": "conda-env-envdq-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}