|
@@ -0,0 +1,438 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "# Introduction To The Dataset"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 81,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "csv_list = open(\"US_births_1994-2003_CDC_NCHS.csv\").read().split(\"\\n\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 82,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "['year,month,date_of_month,day_of_week,births',\n",
|
|
|
+ " '1994,1,1,6,8096',\n",
|
|
|
+ " '1994,1,2,7,7772',\n",
|
|
|
+ " '1994,1,3,1,10142',\n",
|
|
|
+ " '1994,1,4,2,11248',\n",
|
|
|
+ " '1994,1,5,3,11053',\n",
|
|
|
+ " '1994,1,6,4,11406',\n",
|
|
|
+ " '1994,1,7,5,11251',\n",
|
|
|
+ " '1994,1,8,6,8653',\n",
|
|
|
+ " '1994,1,9,7,7910']"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 82,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "csv_list[0:10]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Converting Data Into A List Of Lists"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 71,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def read_csv(filename):\n",
|
|
|
+ " data_string = open(filename).read()\n",
|
|
|
+ " data_list = data_string.split(\"\\n\")[1:]\n",
|
|
|
+ " final_list = list()\n",
|
|
|
+ " \n",
|
|
|
+ " for row in data_list:\n",
|
|
|
+ " row_list = row.split(\",\")\n",
|
|
|
+ " int_row_list = list()\n",
|
|
|
+ " for val_string in row_list:\n",
|
|
|
+ " int_row_list.append(int(val_string))\n",
|
|
|
+ " final_list.append(int_row_list)\n",
|
|
|
+ " return final_list\n",
|
|
|
+ " \n",
|
|
|
+ "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 85,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "[[1994, 1, 1, 6, 8096],\n",
|
|
|
+ " [1994, 1, 2, 7, 7772],\n",
|
|
|
+ " [1994, 1, 3, 1, 10142],\n",
|
|
|
+ " [1994, 1, 4, 2, 11248],\n",
|
|
|
+ " [1994, 1, 5, 3, 11053],\n",
|
|
|
+ " [1994, 1, 6, 4, 11406],\n",
|
|
|
+ " [1994, 1, 7, 5, 11251],\n",
|
|
|
+ " [1994, 1, 8, 6, 8653],\n",
|
|
|
+ " [1994, 1, 9, 7, 7910],\n",
|
|
|
+ " [1994, 1, 10, 1, 10498]]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 85,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "cdc_list[0:10]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Calculating Number Of Births Each Month"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 73,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def month_births(data):\n",
|
|
|
+ " month_sums = dict()\n",
|
|
|
+ " \n",
|
|
|
+ " for row in data:\n",
|
|
|
+ " month = row[1]\n",
|
|
|
+ " births = row[4]\n",
|
|
|
+ " if month in month_sums:\n",
|
|
|
+ " month_sums[month] = month_sums[month] + births\n",
|
|
|
+ " else:\n",
|
|
|
+ " month_sums[month] = births\n",
|
|
|
+ " return month_sums\n",
|
|
|
+ " \n",
|
|
|
+ "cdc_month_births = month_births(cdc_list)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 74,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "{1: 3232517,\n",
|
|
|
+ " 2: 3018140,\n",
|
|
|
+ " 3: 3322069,\n",
|
|
|
+ " 4: 3185314,\n",
|
|
|
+ " 5: 3350907,\n",
|
|
|
+ " 6: 3296530,\n",
|
|
|
+ " 7: 3498783,\n",
|
|
|
+ " 8: 3525858,\n",
|
|
|
+ " 9: 3439698,\n",
|
|
|
+ " 10: 3378814,\n",
|
|
|
+ " 11: 3171647,\n",
|
|
|
+ " 12: 3301860}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 74,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "cdc_month_births"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Calculating Number Of Births Each Day Of Week"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 86,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": true
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def dow_births(data):\n",
|
|
|
+ " dow_sums = dict()\n",
|
|
|
+ " \n",
|
|
|
+ " for row in data:\n",
|
|
|
+ " dow = row[3]\n",
|
|
|
+ " births = row[4]\n",
|
|
|
+ " if dow in dow_sums:\n",
|
|
|
+ " dow_sums[dow] = dow_sums[dow] + births\n",
|
|
|
+ " else:\n",
|
|
|
+ " dow_sums[dow] = births\n",
|
|
|
+ " return dow_sums\n",
|
|
|
+ " \n",
|
|
|
+ "cdc_dow_births = dow_births(cdc_list)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 87,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "{1: 5789166,\n",
|
|
|
+ " 2: 6446196,\n",
|
|
|
+ " 3: 6322855,\n",
|
|
|
+ " 4: 6288429,\n",
|
|
|
+ " 5: 6233657,\n",
|
|
|
+ " 6: 4562111,\n",
|
|
|
+ " 7: 4079723}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 87,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "cdc_dow_births"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Creating A More General Function"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 101,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def calc_counts(data, col_num):\n",
|
|
|
+ " sums_dict = dict()\n",
|
|
|
+ " \n",
|
|
|
+ " for row in data:\n",
|
|
|
+ " col_value = row[col_num]\n",
|
|
|
+ " births = row[4]\n",
|
|
|
+ " if col_value in sums_dict:\n",
|
|
|
+ " sums_dict[col_value] = sums_dict[col_value] + births\n",
|
|
|
+ " else:\n",
|
|
|
+ " sums_dict[col_value] = births\n",
|
|
|
+ " return sums_dict\n",
|
|
|
+ "\n",
|
|
|
+ "cdc_year_births = calc_counts(cdc_list, 0)\n",
|
|
|
+ "cdc_month_births = calc_counts(cdc_list, 1)\n",
|
|
|
+ "cdc_dom_births = calc_counts(cdc_list, 2)\n",
|
|
|
+ "cdc_dow_births = calc_counts(cdc_list, 3)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 102,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "{1994: 3952767,\n",
|
|
|
+ " 1995: 3899589,\n",
|
|
|
+ " 1996: 3891494,\n",
|
|
|
+ " 1997: 3880894,\n",
|
|
|
+ " 1998: 3941553,\n",
|
|
|
+ " 1999: 3959417,\n",
|
|
|
+ " 2000: 4058814,\n",
|
|
|
+ " 2001: 4025933,\n",
|
|
|
+ " 2002: 4021726,\n",
|
|
|
+ " 2003: 4089950}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 102,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "cdc_year_births"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 103,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "{1: 3232517,\n",
|
|
|
+ " 2: 3018140,\n",
|
|
|
+ " 3: 3322069,\n",
|
|
|
+ " 4: 3185314,\n",
|
|
|
+ " 5: 3350907,\n",
|
|
|
+ " 6: 3296530,\n",
|
|
|
+ " 7: 3498783,\n",
|
|
|
+ " 8: 3525858,\n",
|
|
|
+ " 9: 3439698,\n",
|
|
|
+ " 10: 3378814,\n",
|
|
|
+ " 11: 3171647,\n",
|
|
|
+ " 12: 3301860}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 103,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "cdc_month_births"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 104,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false,
|
|
|
+ "scrolled": true
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "{1: 1276557,\n",
|
|
|
+ " 2: 1288739,\n",
|
|
|
+ " 3: 1304499,\n",
|
|
|
+ " 4: 1288154,\n",
|
|
|
+ " 5: 1299953,\n",
|
|
|
+ " 6: 1304474,\n",
|
|
|
+ " 7: 1310459,\n",
|
|
|
+ " 8: 1312297,\n",
|
|
|
+ " 9: 1303292,\n",
|
|
|
+ " 10: 1320764,\n",
|
|
|
+ " 11: 1314361,\n",
|
|
|
+ " 12: 1318437,\n",
|
|
|
+ " 13: 1277684,\n",
|
|
|
+ " 14: 1320153,\n",
|
|
|
+ " 15: 1319171,\n",
|
|
|
+ " 16: 1315192,\n",
|
|
|
+ " 17: 1324953,\n",
|
|
|
+ " 18: 1326855,\n",
|
|
|
+ " 19: 1318727,\n",
|
|
|
+ " 20: 1324821,\n",
|
|
|
+ " 21: 1322897,\n",
|
|
|
+ " 22: 1317381,\n",
|
|
|
+ " 23: 1293290,\n",
|
|
|
+ " 24: 1288083,\n",
|
|
|
+ " 25: 1272116,\n",
|
|
|
+ " 26: 1284796,\n",
|
|
|
+ " 27: 1294395,\n",
|
|
|
+ " 28: 1307685,\n",
|
|
|
+ " 29: 1223161,\n",
|
|
|
+ " 30: 1202095,\n",
|
|
|
+ " 31: 746696}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 104,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "cdc_dom_births"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 105,
|
|
|
+ "metadata": {
|
|
|
+ "collapsed": false
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "{1: 5789166,\n",
|
|
|
+ " 2: 6446196,\n",
|
|
|
+ " 3: 6322855,\n",
|
|
|
+ " 4: 6288429,\n",
|
|
|
+ " 5: 6233657,\n",
|
|
|
+ " 6: 4562111,\n",
|
|
|
+ " 7: 4079723}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 105,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "cdc_dow_births"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "anaconda-cloud": {},
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python [conda env:envdq]",
|
|
|
+ "language": "python",
|
|
|
+ "name": "conda-env-envdq-py"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.4.5"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 1
|
|
|
+}
|