{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "# Introduction To The Dataset" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [], "source": [ "csv_list = open(\"US_births_1994-2003_CDC_NCHS.csv\").read().split(\"\\n\")" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['year,month,date_of_month,day_of_week,births',\n", " '1994,1,1,6,8096',\n", " '1994,1,2,7,7772',\n", " '1994,1,3,1,10142',\n", " '1994,1,4,2,11248',\n", " '1994,1,5,3,11053',\n", " '1994,1,6,4,11406',\n", " '1994,1,7,5,11251',\n", " '1994,1,8,6,8653',\n", " '1994,1,9,7,7910']" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "csv_list[0:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Converting Data Into A List Of Lists" ] }, { "cell_type": "code", "execution_count": 138, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def read_csv(filename):\n", " string_data = open(filename).read()\n", " string_list = string_data.split(\"\\n\")[1:]\n", " final_list = []\n", " \n", " for row in string_list:\n", " string_fields = row.split(\",\")\n", " int_fields = []\n", " for value in string_fields:\n", " int_fields.append(int(value))\n", " final_list.append(int_fields)\n", " return final_list\n", " \n", "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[[1994, 1, 1, 6, 8096],\n", " [1994, 1, 2, 7, 7772],\n", " [1994, 1, 3, 1, 10142],\n", " [1994, 1, 4, 2, 11248],\n", " [1994, 1, 5, 3, 11053],\n", " [1994, 1, 6, 4, 11406],\n", " [1994, 1, 7, 5, 11251],\n", " [1994, 1, 8, 6, 8653],\n", " [1994, 1, 9, 7, 7910],\n", " [1994, 1, 10, 1, 10498]]" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdc_list[0:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating Number Of Births Each Month" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def read_csv(filename):\n", " string_data = open(filename).read()\n", " string_list = string_data.split(\"\\n\")[1:]\n", " final_list = []\n", " \n", " for row in string_list:\n", " string_fields = row.split(\",\")\n", " int_fields = []\n", " for value in string_fields:\n", " int_fields.append(int(value))\n", " final_list.append(int_fields)\n", " return final_list\n", " \n", "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")\n", "\n", "\n", "def month_births(data):\n", " births_per_month = {}\n", " \n", " for row in data:\n", " month = row[1]\n", " births = row[4]\n", " if month in births_per_month:\n", " births_per_month[month] = births_per_month[month] + births\n", " else:\n", " births_per_month[month] = births\n", " return births_per_month\n", " \n", "cdc_month_births = month_births(cdc_list)" ] }, { "cell_type": "code", "execution_count": 141, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{1: 3232517,\n", " 2: 3018140,\n", " 3: 3322069,\n", " 4: 3185314,\n", " 5: 3350907,\n", " 6: 3296530,\n", " 7: 3498783,\n", " 8: 3525858,\n", " 9: 3439698,\n", " 10: 3378814,\n", " 11: 3171647,\n", " 12: 3301860}" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdc_month_births" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating Number Of Births Each Day Of Week" ] }, { "cell_type": "code", "execution_count": 142, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def dow_births(data):\n", " births_per_dow = {}\n", " \n", " for row in data:\n", " dow = row[3]\n", " births = row[4]\n", " if dow in births_per_dow:\n", " births_per_dow[dow] = births_per_dow[dow] + births\n", " else:\n", " births_per_dow[dow] = births\n", " return births_per_dow\n", " \n", "cdc_dow_births = dow_births(cdc_list)" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{1: 5789166,\n", " 2: 6446196,\n", " 3: 6322855,\n", " 4: 6288429,\n", " 5: 6233657,\n", " 6: 4562111,\n", " 7: 4079723}" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdc_dow_births" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Creating A More General Function" ] }, { "cell_type": "code", "execution_count": 144, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def calc_counts(data, column):\n", " sums_dict = {}\n", " \n", " for row in data:\n", " col_value = row[column]\n", " births = row[4]\n", " if col_value in sums_dict:\n", " sums_dict[col_value] = sums_dict[col_value] + births\n", " else:\n", " sums_dict[col_value] = births\n", " return sums_dict\n", "\n", "cdc_year_births = calc_counts(cdc_list, 0)\n", "cdc_month_births = calc_counts(cdc_list, 1)\n", "cdc_dom_births = calc_counts(cdc_list, 2)\n", "cdc_dow_births = calc_counts(cdc_list, 3)" ] }, { "cell_type": "code", "execution_count": 145, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{1994: 3952767,\n", " 1995: 3899589,\n", " 1996: 3891494,\n", " 1997: 3880894,\n", " 1998: 3941553,\n", " 1999: 3959417,\n", " 2000: 4058814,\n", " 2001: 4025933,\n", " 2002: 4021726,\n", " 2003: 4089950}" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdc_year_births" ] }, { "cell_type": "code", "execution_count": 146, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{1: 3232517,\n", " 2: 3018140,\n", " 3: 3322069,\n", " 4: 3185314,\n", " 5: 3350907,\n", " 6: 3296530,\n", " 7: 3498783,\n", " 8: 3525858,\n", " 9: 3439698,\n", " 10: 3378814,\n", " 11: 3171647,\n", " 12: 3301860}" ] }, "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdc_month_births" ] }, { "cell_type": "code", "execution_count": 147, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{1: 1276557,\n", " 2: 1288739,\n", " 3: 1304499,\n", " 4: 1288154,\n", " 5: 1299953,\n", " 6: 1304474,\n", " 7: 1310459,\n", " 8: 1312297,\n", " 9: 1303292,\n", " 10: 1320764,\n", " 11: 1314361,\n", " 12: 1318437,\n", " 13: 1277684,\n", " 14: 1320153,\n", " 15: 1319171,\n", " 16: 1315192,\n", " 17: 1324953,\n", " 18: 1326855,\n", " 19: 1318727,\n", " 20: 1324821,\n", " 21: 1322897,\n", " 22: 1317381,\n", " 23: 1293290,\n", " 24: 1288083,\n", " 25: 1272116,\n", " 26: 1284796,\n", " 27: 1294395,\n", " 28: 1307685,\n", " 29: 1223161,\n", " 30: 1202095,\n", " 31: 746696}" ] }, "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdc_dom_births" ] }, { "cell_type": "code", "execution_count": 148, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{1: 5789166,\n", " 2: 6446196,\n", " 3: 6322855,\n", " 4: 6288429,\n", " 5: 6233657,\n", " 6: 4562111,\n", " 7: 4079723}" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdc_dow_births" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda env:envdq]", "language": "python", "name": "conda-env-envdq-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.5" } }, "nbformat": 4, "nbformat_minor": 1 }