|
@@ -11,7 +11,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 81,
|
|
|
+ "execution_count": 136,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -22,7 +22,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 82,
|
|
|
+ "execution_count": 137,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -42,7 +42,7 @@
|
|
|
" '1994,1,9,7,7910']"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 82,
|
|
|
+ "execution_count": 137,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -60,23 +60,23 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 71,
|
|
|
+ "execution_count": 138,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"def read_csv(filename):\n",
|
|
|
- " data_string = open(filename).read()\n",
|
|
|
- " data_list = data_string.split(\"\\n\")[1:]\n",
|
|
|
- " final_list = list()\n",
|
|
|
+ " string_data = open(filename).read()\n",
|
|
|
+ " string_list = string_data.split(\"\\n\")[1:]\n",
|
|
|
+ " final_list = []\n",
|
|
|
" \n",
|
|
|
- " for row in data_list:\n",
|
|
|
- " row_list = row.split(\",\")\n",
|
|
|
- " int_row_list = list()\n",
|
|
|
- " for val_string in row_list:\n",
|
|
|
- " int_row_list.append(int(val_string))\n",
|
|
|
- " final_list.append(int_row_list)\n",
|
|
|
+ " for row in string_list:\n",
|
|
|
+ " string_fields = row.split(\",\")\n",
|
|
|
+ " int_fields = []\n",
|
|
|
+ " for value in string_fields:\n",
|
|
|
+ " int_fields.append(int(value))\n",
|
|
|
+ " final_list.append(int_fields)\n",
|
|
|
" return final_list\n",
|
|
|
" \n",
|
|
|
"cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")"
|
|
@@ -84,7 +84,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 85,
|
|
|
+ "execution_count": 139,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -104,7 +104,7 @@
|
|
|
" [1994, 1, 10, 1, 10498]]"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 85,
|
|
|
+ "execution_count": 139,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -122,30 +122,46 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 73,
|
|
|
+ "execution_count": 140,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
+ "def read_csv(filename):\n",
|
|
|
+ " string_data = open(filename).read()\n",
|
|
|
+ " string_list = string_data.split(\"\\n\")[1:]\n",
|
|
|
+ " final_list = []\n",
|
|
|
+ " \n",
|
|
|
+ " for row in string_list:\n",
|
|
|
+ " string_fields = row.split(\",\")\n",
|
|
|
+ " int_fields = []\n",
|
|
|
+ " for value in string_fields:\n",
|
|
|
+ " int_fields.append(int(value))\n",
|
|
|
+ " final_list.append(int_fields)\n",
|
|
|
+ " return final_list\n",
|
|
|
+ " \n",
|
|
|
+ "cdc_list = read_csv(\"US_births_1994-2003_CDC_NCHS.csv\")\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
"def month_births(data):\n",
|
|
|
- " month_sums = dict()\n",
|
|
|
+ " births_per_month = {}\n",
|
|
|
" \n",
|
|
|
" for row in data:\n",
|
|
|
" month = row[1]\n",
|
|
|
" births = row[4]\n",
|
|
|
- " if month in month_sums:\n",
|
|
|
- " month_sums[month] = month_sums[month] + births\n",
|
|
|
+ " if month in births_per_month:\n",
|
|
|
+ " births_per_month[month] = births_per_month[month] + births\n",
|
|
|
" else:\n",
|
|
|
- " month_sums[month] = births\n",
|
|
|
- " return month_sums\n",
|
|
|
+ " births_per_month[month] = births\n",
|
|
|
+ " return births_per_month\n",
|
|
|
" \n",
|
|
|
"cdc_month_births = month_births(cdc_list)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 74,
|
|
|
+ "execution_count": 141,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -167,7 +183,7 @@
|
|
|
" 12: 3301860}"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 74,
|
|
|
+ "execution_count": 141,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -185,32 +201,33 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 86,
|
|
|
+ "execution_count": 142,
|
|
|
"metadata": {
|
|
|
"collapsed": true
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"def dow_births(data):\n",
|
|
|
- " dow_sums = dict()\n",
|
|
|
+ " births_per_dow = {}\n",
|
|
|
" \n",
|
|
|
" for row in data:\n",
|
|
|
" dow = row[3]\n",
|
|
|
" births = row[4]\n",
|
|
|
- " if dow in dow_sums:\n",
|
|
|
- " dow_sums[dow] = dow_sums[dow] + births\n",
|
|
|
+ " if dow in births_per_dow:\n",
|
|
|
+ " births_per_dow[dow] = births_per_dow[dow] + births\n",
|
|
|
" else:\n",
|
|
|
- " dow_sums[dow] = births\n",
|
|
|
- " return dow_sums\n",
|
|
|
+ " births_per_dow[dow] = births\n",
|
|
|
+ " return births_per_dow\n",
|
|
|
" \n",
|
|
|
"cdc_dow_births = dow_births(cdc_list)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 87,
|
|
|
+ "execution_count": 143,
|
|
|
"metadata": {
|
|
|
- "collapsed": false
|
|
|
+ "collapsed": false,
|
|
|
+ "scrolled": true
|
|
|
},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -225,7 +242,7 @@
|
|
|
" 7: 4079723}"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 87,
|
|
|
+ "execution_count": 143,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -243,17 +260,17 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 101,
|
|
|
+ "execution_count": 144,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "def calc_counts(data, col_num):\n",
|
|
|
- " sums_dict = dict()\n",
|
|
|
+ "def calc_counts(data, column):\n",
|
|
|
+ " sums_dict = {}\n",
|
|
|
" \n",
|
|
|
" for row in data:\n",
|
|
|
- " col_value = row[col_num]\n",
|
|
|
+ " col_value = row[column]\n",
|
|
|
" births = row[4]\n",
|
|
|
" if col_value in sums_dict:\n",
|
|
|
" sums_dict[col_value] = sums_dict[col_value] + births\n",
|
|
@@ -269,7 +286,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 102,
|
|
|
+ "execution_count": 145,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -289,7 +306,7 @@
|
|
|
" 2003: 4089950}"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 102,
|
|
|
+ "execution_count": 145,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -300,7 +317,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 103,
|
|
|
+ "execution_count": 146,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -322,7 +339,7 @@
|
|
|
" 12: 3301860}"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 103,
|
|
|
+ "execution_count": 146,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -333,7 +350,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 104,
|
|
|
+ "execution_count": 147,
|
|
|
"metadata": {
|
|
|
"collapsed": false,
|
|
|
"scrolled": true
|
|
@@ -375,7 +392,7 @@
|
|
|
" 31: 746696}"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 104,
|
|
|
+ "execution_count": 147,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -386,7 +403,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 105,
|
|
|
+ "execution_count": 148,
|
|
|
"metadata": {
|
|
|
"collapsed": false
|
|
|
},
|
|
@@ -403,7 +420,7 @@
|
|
|
" 7: 4079723}"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 105,
|
|
|
+ "execution_count": 148,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|