Queer European MD passionate about IT
Browse Source

Deleted ipynb files

Davte 4 months ago
parent
commit
e426cab445
73 changed files with 1 additions and 63650 deletions
  1. 0 751
      Mission103Solutions.ipynb
  2. 0 156
      Mission146Solutions.ipynb
  3. 0 16
      Mission149Solutions.ipynb
  4. 0 1007
      Mission155Solutions.ipynb
  5. 0 1621
      Mission165Solutions.ipynb
  6. 0 698
      Mission167Solutions.ipynb
  7. 0 1006
      Mission177Solutions.ipynb
  8. 0 349
      Mission188Solution.ipynb
  9. 0 728
      Mission191Solutions.ipynb
  10. 0 5968
      Mission193Solutions.ipynb
  11. 0 1245
      Mission201Solution.ipynb
  12. 0 282
      Mission202Solution.ipynb
  13. 0 997
      Mission205Solutions.ipynb
  14. 0 88
      Mission207Solutions.ipynb
  15. 0 1929
      Mission209Solution.ipynb
  16. 0 814
      Mission210Solution.ipynb
  17. 0 230
      Mission211Solution.ipynb
  18. 0 188
      Mission213Solution.ipynb
  19. 0 158
      Mission215Solutions.ipynb
  20. 0 331
      Mission216Solutions.ipynb
  21. 0 310
      Mission217Solutions.ipynb
  22. 0 475
      Mission218Solution.ipynb
  23. 0 982
      Mission219Solution.ipynb
  24. 0 1066
      Mission227Solutions.ipynb
  25. 0 113
      Mission234Solutions.ipynb
  26. 0 1097
      Mission240Solutions.ipynb
  27. 0 169
      Mission244Solutions.ipynb
  28. 0 557
      Mission251Solution.ipynb
  29. 0 848
      Mission257Solutions.ipynb
  30. 0 124
      Mission267Solutions.ipynb
  31. 0 292
      Mission280Solutions.ipynb
  32. 0 1080
      Mission288Solutions.ipynb
  33. 0 2205
      Mission294Solutions.ipynb
  34. 0 219
      Mission304Solutions.ipynb
  35. 0 1374
      Mission310Solutions.ipynb
  36. 0 3160
      Mission348Solutions.ipynb
  37. 0 633
      Mission349Solutions.ipynb
  38. 0 1984
      Mission350Solutions.ipynb
  39. 0 496
      Mission356Solutions.ipynb
  40. 0 104
      Mission368Solutions.ipynb
  41. 0 717
      Mission382Solutions.ipynb
  42. 0 1311
      Mission433Solutions.ipynb
  43. 0 816
      Mission469Solutions.ipynb
  44. 0 630
      Mission481Solution.ipynb
  45. 0 640
      Mission481Solutions.ipynb
  46. 0 326
      Mission524Solutions.ipynb
  47. 0 839
      Mission529Solutions.ipynb
  48. 0 453
      Mission530Solutions.ipynb
  49. 0 58
      Mission559Solutions.ipynb
  50. 0 474
      Mission564Solutions.ipynb
  51. 0 58
      Mission569Solutions.ipynb
  52. 0 322
      Mission610Solutions.ipynb
  53. 0 570
      Mission612Solutions.ipynb
  54. 0 423
      Mission718Solutions.ipynb
  55. 0 132
      Mission730Solutions.ipynb
  56. 0 189
      Mission735Solutions.ipynb
  57. 0 638
      Mission740Solutions.ipynb
  58. 0 553
      Mission745Solutions.ipynb
  59. 0 704
      Mission750Solutions.ipynb
  60. 0 11643
      Mission755Solutions.ipynb
  61. 0 119
      Mission764Solutions.ipynb
  62. 0 2017
      Mission777Solutions.ipynb
  63. 0 217
      Mission784Solutions.ipynb
  64. 0 427
      Mission790Solutions.ipynb
  65. 0 550
      Mission797Solutions.ipynb
  66. 0 931
      Mission798Solutions.ipynb
  67. 0 442
      Mission804Solutions.ipynb
  68. 0 247
      Mission855Solutions.ipynb
  69. 0 513
      Mission882Solutions.ipynb
  70. 0 39
      Mission893Solutions.ipynb
  71. 0 346
      Mission909Solutions.ipynb
  72. 0 455
      Mission9Solutions.ipynb
  73. 1 1
      run_me.sh

File diff suppressed because it is too large
+ 0 - 751
Mission103Solutions.ipynb


File diff suppressed because it is too large
+ 0 - 156
Mission146Solutions.ipynb


File diff suppressed because it is too large
+ 0 - 16
Mission149Solutions.ipynb


File diff suppressed because it is too large
+ 0 - 1007
Mission155Solutions.ipynb


+ 0 - 1621
Mission165Solutions.ipynb

@@ -1,1621 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Introduction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>member_id</th>\n",
-       "      <th>loan_amnt</th>\n",
-       "      <th>funded_amnt</th>\n",
-       "      <th>funded_amnt_inv</th>\n",
-       "      <th>term</th>\n",
-       "      <th>int_rate</th>\n",
-       "      <th>installment</th>\n",
-       "      <th>grade</th>\n",
-       "      <th>sub_grade</th>\n",
-       "      <th>emp_title</th>\n",
-       "      <th>emp_length</th>\n",
-       "      <th>home_ownership</th>\n",
-       "      <th>annual_inc</th>\n",
-       "      <th>verification_status</th>\n",
-       "      <th>issue_d</th>\n",
-       "      <th>loan_status</th>\n",
-       "      <th>pymnt_plan</th>\n",
-       "      <th>purpose</th>\n",
-       "      <th>title</th>\n",
-       "      <th>zip_code</th>\n",
-       "      <th>addr_state</th>\n",
-       "      <th>dti</th>\n",
-       "      <th>delinq_2yrs</th>\n",
-       "      <th>earliest_cr_line</th>\n",
-       "      <th>inq_last_6mths</th>\n",
-       "      <th>open_acc</th>\n",
-       "      <th>pub_rec</th>\n",
-       "      <th>revol_bal</th>\n",
-       "      <th>revol_util</th>\n",
-       "      <th>total_acc</th>\n",
-       "      <th>initial_list_status</th>\n",
-       "      <th>out_prncp</th>\n",
-       "      <th>out_prncp_inv</th>\n",
-       "      <th>total_pymnt</th>\n",
-       "      <th>total_pymnt_inv</th>\n",
-       "      <th>total_rec_prncp</th>\n",
-       "      <th>total_rec_int</th>\n",
-       "      <th>total_rec_late_fee</th>\n",
-       "      <th>recoveries</th>\n",
-       "      <th>collection_recovery_fee</th>\n",
-       "      <th>last_pymnt_d</th>\n",
-       "      <th>last_pymnt_amnt</th>\n",
-       "      <th>last_credit_pull_d</th>\n",
-       "      <th>collections_12_mths_ex_med</th>\n",
-       "      <th>policy_code</th>\n",
-       "      <th>application_type</th>\n",
-       "      <th>acc_now_delinq</th>\n",
-       "      <th>chargeoff_within_12_mths</th>\n",
-       "      <th>delinq_amnt</th>\n",
-       "      <th>pub_rec_bankruptcies</th>\n",
-       "      <th>tax_liens</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1077501</td>\n",
-       "      <td>1296599.0</td>\n",
-       "      <td>5000.0</td>\n",
-       "      <td>5000.0</td>\n",
-       "      <td>4975.0</td>\n",
-       "      <td>36 months</td>\n",
-       "      <td>10.65%</td>\n",
-       "      <td>162.87</td>\n",
-       "      <td>B</td>\n",
-       "      <td>B2</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>10+ years</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>24000.0</td>\n",
-       "      <td>Verified</td>\n",
-       "      <td>Dec-2011</td>\n",
-       "      <td>Fully Paid</td>\n",
-       "      <td>n</td>\n",
-       "      <td>credit_card</td>\n",
-       "      <td>Computer</td>\n",
-       "      <td>860xx</td>\n",
-       "      <td>AZ</td>\n",
-       "      <td>27.65</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Jan-1985</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>13648.0</td>\n",
-       "      <td>83.7%</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>f</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>5863.155187</td>\n",
-       "      <td>5833.84</td>\n",
-       "      <td>5000.00</td>\n",
-       "      <td>863.16</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>Jan-2015</td>\n",
-       "      <td>171.62</td>\n",
-       "      <td>Jun-2016</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>INDIVIDUAL</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1077430</td>\n",
-       "      <td>1314167.0</td>\n",
-       "      <td>2500.0</td>\n",
-       "      <td>2500.0</td>\n",
-       "      <td>2500.0</td>\n",
-       "      <td>60 months</td>\n",
-       "      <td>15.27%</td>\n",
-       "      <td>59.83</td>\n",
-       "      <td>C</td>\n",
-       "      <td>C4</td>\n",
-       "      <td>Ryder</td>\n",
-       "      <td>&lt; 1 year</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>30000.0</td>\n",
-       "      <td>Source Verified</td>\n",
-       "      <td>Dec-2011</td>\n",
-       "      <td>Charged Off</td>\n",
-       "      <td>n</td>\n",
-       "      <td>car</td>\n",
-       "      <td>bike</td>\n",
-       "      <td>309xx</td>\n",
-       "      <td>GA</td>\n",
-       "      <td>1.00</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Apr-1999</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1687.0</td>\n",
-       "      <td>9.4%</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>f</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>1008.710000</td>\n",
-       "      <td>1008.71</td>\n",
-       "      <td>456.46</td>\n",
-       "      <td>435.17</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>117.08</td>\n",
-       "      <td>1.11</td>\n",
-       "      <td>Apr-2013</td>\n",
-       "      <td>119.66</td>\n",
-       "      <td>Sep-2013</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>INDIVIDUAL</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1077175</td>\n",
-       "      <td>1313524.0</td>\n",
-       "      <td>2400.0</td>\n",
-       "      <td>2400.0</td>\n",
-       "      <td>2400.0</td>\n",
-       "      <td>36 months</td>\n",
-       "      <td>15.96%</td>\n",
-       "      <td>84.33</td>\n",
-       "      <td>C</td>\n",
-       "      <td>C5</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>10+ years</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>12252.0</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Dec-2011</td>\n",
-       "      <td>Fully Paid</td>\n",
-       "      <td>n</td>\n",
-       "      <td>small_business</td>\n",
-       "      <td>real estate business</td>\n",
-       "      <td>606xx</td>\n",
-       "      <td>IL</td>\n",
-       "      <td>8.72</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Nov-2001</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2956.0</td>\n",
-       "      <td>98.5%</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>f</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>3005.666844</td>\n",
-       "      <td>3005.67</td>\n",
-       "      <td>2400.00</td>\n",
-       "      <td>605.67</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>Jun-2014</td>\n",
-       "      <td>649.91</td>\n",
-       "      <td>Jun-2016</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>INDIVIDUAL</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1076863</td>\n",
-       "      <td>1277178.0</td>\n",
-       "      <td>10000.0</td>\n",
-       "      <td>10000.0</td>\n",
-       "      <td>10000.0</td>\n",
-       "      <td>36 months</td>\n",
-       "      <td>13.49%</td>\n",
-       "      <td>339.31</td>\n",
-       "      <td>C</td>\n",
-       "      <td>C1</td>\n",
-       "      <td>AIR RESOURCES BOARD</td>\n",
-       "      <td>10+ years</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>49200.0</td>\n",
-       "      <td>Source Verified</td>\n",
-       "      <td>Dec-2011</td>\n",
-       "      <td>Fully Paid</td>\n",
-       "      <td>n</td>\n",
-       "      <td>other</td>\n",
-       "      <td>personel</td>\n",
-       "      <td>917xx</td>\n",
-       "      <td>CA</td>\n",
-       "      <td>20.00</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Feb-1996</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>5598.0</td>\n",
-       "      <td>21%</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>f</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>12231.890000</td>\n",
-       "      <td>12231.89</td>\n",
-       "      <td>10000.00</td>\n",
-       "      <td>2214.92</td>\n",
-       "      <td>16.97</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>Jan-2015</td>\n",
-       "      <td>357.48</td>\n",
-       "      <td>Apr-2016</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>INDIVIDUAL</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1075358</td>\n",
-       "      <td>1311748.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>60 months</td>\n",
-       "      <td>12.69%</td>\n",
-       "      <td>67.79</td>\n",
-       "      <td>B</td>\n",
-       "      <td>B5</td>\n",
-       "      <td>University Medical Group</td>\n",
-       "      <td>1 year</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>80000.0</td>\n",
-       "      <td>Source Verified</td>\n",
-       "      <td>Dec-2011</td>\n",
-       "      <td>Current</td>\n",
-       "      <td>n</td>\n",
-       "      <td>other</td>\n",
-       "      <td>Personal</td>\n",
-       "      <td>972xx</td>\n",
-       "      <td>OR</td>\n",
-       "      <td>17.94</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Jan-1996</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>27783.0</td>\n",
-       "      <td>53.9%</td>\n",
-       "      <td>38.0</td>\n",
-       "      <td>f</td>\n",
-       "      <td>461.73</td>\n",
-       "      <td>461.73</td>\n",
-       "      <td>3581.120000</td>\n",
-       "      <td>3581.12</td>\n",
-       "      <td>2538.27</td>\n",
-       "      <td>1042.85</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>Jun-2016</td>\n",
-       "      <td>67.79</td>\n",
-       "      <td>Jun-2016</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>INDIVIDUAL</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        id  member_id  loan_amnt  funded_amnt  funded_amnt_inv        term  \\\n",
-       "0  1077501  1296599.0     5000.0       5000.0           4975.0   36 months   \n",
-       "1  1077430  1314167.0     2500.0       2500.0           2500.0   60 months   \n",
-       "2  1077175  1313524.0     2400.0       2400.0           2400.0   36 months   \n",
-       "3  1076863  1277178.0    10000.0      10000.0          10000.0   36 months   \n",
-       "4  1075358  1311748.0     3000.0       3000.0           3000.0   60 months   \n",
-       "\n",
-       "  int_rate  installment grade sub_grade                 emp_title emp_length  \\\n",
-       "0   10.65%       162.87     B        B2                       NaN  10+ years   \n",
-       "1   15.27%        59.83     C        C4                     Ryder   < 1 year   \n",
-       "2   15.96%        84.33     C        C5                       NaN  10+ years   \n",
-       "3   13.49%       339.31     C        C1       AIR RESOURCES BOARD  10+ years   \n",
-       "4   12.69%        67.79     B        B5  University Medical Group     1 year   \n",
-       "\n",
-       "  home_ownership  annual_inc verification_status   issue_d  loan_status  \\\n",
-       "0           RENT     24000.0            Verified  Dec-2011   Fully Paid   \n",
-       "1           RENT     30000.0     Source Verified  Dec-2011  Charged Off   \n",
-       "2           RENT     12252.0        Not Verified  Dec-2011   Fully Paid   \n",
-       "3           RENT     49200.0     Source Verified  Dec-2011   Fully Paid   \n",
-       "4           RENT     80000.0     Source Verified  Dec-2011      Current   \n",
-       "\n",
-       "  pymnt_plan         purpose                 title zip_code addr_state    dti  \\\n",
-       "0          n     credit_card              Computer    860xx         AZ  27.65   \n",
-       "1          n             car                  bike    309xx         GA   1.00   \n",
-       "2          n  small_business  real estate business    606xx         IL   8.72   \n",
-       "3          n           other              personel    917xx         CA  20.00   \n",
-       "4          n           other              Personal    972xx         OR  17.94   \n",
-       "\n",
-       "   delinq_2yrs earliest_cr_line  inq_last_6mths  open_acc  pub_rec  revol_bal  \\\n",
-       "0          0.0         Jan-1985             1.0       3.0      0.0    13648.0   \n",
-       "1          0.0         Apr-1999             5.0       3.0      0.0     1687.0   \n",
-       "2          0.0         Nov-2001             2.0       2.0      0.0     2956.0   \n",
-       "3          0.0         Feb-1996             1.0      10.0      0.0     5598.0   \n",
-       "4          0.0         Jan-1996             0.0      15.0      0.0    27783.0   \n",
-       "\n",
-       "  revol_util  total_acc initial_list_status  out_prncp  out_prncp_inv  \\\n",
-       "0      83.7%        9.0                   f       0.00           0.00   \n",
-       "1       9.4%        4.0                   f       0.00           0.00   \n",
-       "2      98.5%       10.0                   f       0.00           0.00   \n",
-       "3        21%       37.0                   f       0.00           0.00   \n",
-       "4      53.9%       38.0                   f     461.73         461.73   \n",
-       "\n",
-       "    total_pymnt  total_pymnt_inv  total_rec_prncp  total_rec_int  \\\n",
-       "0   5863.155187          5833.84          5000.00         863.16   \n",
-       "1   1008.710000          1008.71           456.46         435.17   \n",
-       "2   3005.666844          3005.67          2400.00         605.67   \n",
-       "3  12231.890000         12231.89         10000.00        2214.92   \n",
-       "4   3581.120000          3581.12          2538.27        1042.85   \n",
-       "\n",
-       "   total_rec_late_fee  recoveries  collection_recovery_fee last_pymnt_d  \\\n",
-       "0                0.00        0.00                     0.00     Jan-2015   \n",
-       "1                0.00      117.08                     1.11     Apr-2013   \n",
-       "2                0.00        0.00                     0.00     Jun-2014   \n",
-       "3               16.97        0.00                     0.00     Jan-2015   \n",
-       "4                0.00        0.00                     0.00     Jun-2016   \n",
-       "\n",
-       "   last_pymnt_amnt last_credit_pull_d  collections_12_mths_ex_med  \\\n",
-       "0           171.62           Jun-2016                         0.0   \n",
-       "1           119.66           Sep-2013                         0.0   \n",
-       "2           649.91           Jun-2016                         0.0   \n",
-       "3           357.48           Apr-2016                         0.0   \n",
-       "4            67.79           Jun-2016                         0.0   \n",
-       "\n",
-       "   policy_code application_type  acc_now_delinq  chargeoff_within_12_mths  \\\n",
-       "0          1.0       INDIVIDUAL             0.0                       0.0   \n",
-       "1          1.0       INDIVIDUAL             0.0                       0.0   \n",
-       "2          1.0       INDIVIDUAL             0.0                       0.0   \n",
-       "3          1.0       INDIVIDUAL             0.0                       0.0   \n",
-       "4          1.0       INDIVIDUAL             0.0                       0.0   \n",
-       "\n",
-       "   delinq_amnt  pub_rec_bankruptcies  tax_liens  \n",
-       "0          0.0                   0.0        0.0  \n",
-       "1          0.0                   0.0        0.0  \n",
-       "2          0.0                   0.0        0.0  \n",
-       "3          0.0                   0.0        0.0  \n",
-       "4          0.0                   0.0        0.0  "
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "pd.options.display.max_columns = 99\n",
-    "\n",
-    "first_five = pd.read_csv('loans_2007.csv', nrows=5)\n",
-    "first_five"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.5502548217773438"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "thousand_chunk = pd.read_csv('loans_2007.csv', nrows=1000)\n",
-    "thousand_chunk.memory_usage(deep=True).sum()/(1024*1024)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Let's try tripling to 3000 rows and calculate the memory footprint for each chunk."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "4.649059295654297\n",
-      "4.644805908203125\n",
-      "4.646563529968262\n",
-      "4.647915840148926\n",
-      "4.644108772277832\n",
-      "4.645991325378418\n",
-      "4.644582748413086\n",
-      "4.646951675415039\n",
-      "4.645077705383301\n",
-      "4.64512825012207\n",
-      "4.657840728759766\n",
-      "4.656707763671875\n",
-      "4.663515090942383\n",
-      "4.896956443786621\n",
-      "0.880854606628418\n"
-     ]
-    }
-   ],
-   "source": [
-    "chunk_iter = pd.read_csv('loans_2007.csv', chunksize=3000)\n",
-    "for chunk in chunk_iter:\n",
-    "    print(chunk.memory_usage(deep=True).sum()/(1024*1024))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## How many rows are in the dataset?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "42538\n"
-     ]
-    }
-   ],
-   "source": [
-    "chunk_iter = pd.read_csv('loans_2007.csv', chunksize=3000)\n",
-    "total_rows = 0\n",
-    "for chunk in chunk_iter:\n",
-    "    total_rows += len(chunk)\n",
-    "print(total_rows)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Exploring the Data in Chunks\n",
-    "\n",
-    "## How many columns have a numeric type? How many have a string type?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30]\n",
-      "[21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Numeric columns\n",
-    "loans_chunks = pd.read_csv('loans_2007.csv',chunksize=3000)\n",
-    "\n",
-    "numeric = []\n",
-    "string = []\n",
-    "for lc in loans_chunks:\n",
-    "    nums = lc.select_dtypes(include=[np.number]).shape[1]\n",
-    "    numeric.append(nums)\n",
-    "    strs = lc.select_dtypes(include=['object']).shape[1]\n",
-    "    string.append(strs)\n",
-    "\n",
-    "print(numeric)\n",
-    "print(string)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "overall obj cols: ['term', 'int_rate', 'grade', 'sub_grade', 'emp_title', 'emp_length', 'home_ownership', 'verification_status', 'issue_d', 'loan_status', 'pymnt_plan', 'purpose', 'title', 'zip_code', 'addr_state', 'earliest_cr_line', 'revol_util', 'initial_list_status', 'last_pymnt_d', 'last_credit_pull_d', 'application_type'] \n",
-      "\n",
-      "chunk obj cols: ['id', 'term', 'int_rate', 'grade', 'sub_grade', 'emp_title', 'emp_length', 'home_ownership', 'verification_status', 'issue_d', 'loan_status', 'pymnt_plan', 'purpose', 'title', 'zip_code', 'addr_state', 'earliest_cr_line', 'revol_util', 'initial_list_status', 'last_pymnt_d', 'last_credit_pull_d', 'application_type'] \n",
-      "\n",
-      "overall obj cols: ['term', 'int_rate', 'grade', 'sub_grade', 'emp_title', 'emp_length', 'home_ownership', 'verification_status', 'issue_d', 'loan_status', 'pymnt_plan', 'purpose', 'title', 'zip_code', 'addr_state', 'earliest_cr_line', 'revol_util', 'initial_list_status', 'last_pymnt_d', 'last_credit_pull_d', 'application_type'] \n",
-      "\n",
-      "chunk obj cols: ['id', 'term', 'int_rate', 'grade', 'sub_grade', 'emp_title', 'emp_length', 'home_ownership', 'verification_status', 'issue_d', 'loan_status', 'pymnt_plan', 'purpose', 'title', 'zip_code', 'addr_state', 'earliest_cr_line', 'revol_util', 'initial_list_status', 'last_pymnt_d', 'last_credit_pull_d', 'application_type'] \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Are string columns consistent across chunks?\n",
-    "obj_cols = []\n",
-    "chunk_iter = pd.read_csv('loans_2007.csv', chunksize=3000)\n",
-    "\n",
-    "for chunk in chunk_iter:\n",
-    "    chunk_obj_cols = chunk.select_dtypes(include=['object']).columns.tolist()\n",
-    "    if len(obj_cols) > 0:\n",
-    "        is_same = obj_cols == chunk_obj_cols\n",
-    "        if not is_same:\n",
-    "            print(\"overall obj cols:\", obj_cols, \"\\n\")\n",
-    "            print(\"chunk obj cols:\", chunk_obj_cols, \"\\n\")    \n",
-    "    else:\n",
-    "        obj_cols = chunk_obj_cols"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": true
-   },
-   "source": [
-    "### Observation 1: By default โ€” 31 numeric columns and 21 string columns.\n",
-    "\n",
-    "### Observation 2: It seems like one column in particular (the `id` column) is being cast to int64 in the last 2 chunks but not in the earlier chunks. Since the `id` column won't be useful for analysis, visualization, or predictive modeling, let's ignore this column.\n",
-    "\n",
-    "## How many unique values are there in each string column? How many of the string columns contain values that are less than 50% unique?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "term 2\n",
-      "grade 7\n",
-      "sub_grade 35\n",
-      "emp_length 11\n",
-      "home_ownership 5\n",
-      "verification_status 3\n",
-      "loan_status 9\n",
-      "pymnt_plan 2\n",
-      "purpose 14\n",
-      "initial_list_status 1\n",
-      "application_type 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "loans_chunks = pd.read_csv('loans_2007.csv',chunksize=3000)\n",
-    "\n",
-    "uniques = {}\n",
-    "for lc in loans_chunks:\n",
-    "    strings_only = lc.select_dtypes(include=['object'])\n",
-    "    cols = strings_only.columns\n",
-    "    for c in cols:\n",
-    "        val_counts = strings_only[c].value_counts()\n",
-    "        if c in uniques:\n",
-    "            uniques[c].append(val_counts)\n",
-    "        else:\n",
-    "            uniques[c] = [val_counts]\n",
-    "\n",
-    "uniques_combined = {}\n",
-    "unique_stats = {\n",
-    "    'column_name': [],\n",
-    "    'total_values': [],\n",
-    "    'unique_values': [],\n",
-    "}\n",
-    "for col in uniques:\n",
-    "    u_concat = pd.concat(uniques[col])\n",
-    "    u_group = u_concat.groupby(u_concat.index).sum()\n",
-    "    uniques_combined[col] = u_group\n",
-    "    if u_group.shape[0] < 50:\n",
-    "        print(col, u_group.shape[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Which float columns have no missing values and could be candidates for conversion to the integer type?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "member_id                        3\n",
-       "total_rec_int                    3\n",
-       "total_pymnt_inv                  3\n",
-       "total_pymnt                      3\n",
-       "revol_bal                        3\n",
-       "recoveries                       3\n",
-       "policy_code                      3\n",
-       "out_prncp_inv                    3\n",
-       "out_prncp                        3\n",
-       "total_rec_late_fee               3\n",
-       "loan_amnt                        3\n",
-       "last_pymnt_amnt                  3\n",
-       "total_rec_prncp                  3\n",
-       "funded_amnt_inv                  3\n",
-       "funded_amnt                      3\n",
-       "dti                              3\n",
-       "collection_recovery_fee          3\n",
-       "installment                      3\n",
-       "annual_inc                       7\n",
-       "inq_last_6mths                  32\n",
-       "total_acc                       32\n",
-       "delinq_2yrs                     32\n",
-       "pub_rec                         32\n",
-       "delinq_amnt                     32\n",
-       "open_acc                        32\n",
-       "acc_now_delinq                  32\n",
-       "tax_liens                      108\n",
-       "collections_12_mths_ex_med     148\n",
-       "chargeoff_within_12_mths       148\n",
-       "pub_rec_bankruptcies          1368\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "loans_chunks = pd.read_csv('loans_2007.csv',chunksize=3000)\n",
-    "\n",
-    "missing = []\n",
-    "for lc in loans_chunks:\n",
-    "    floats = lc.select_dtypes(include=['float'])\n",
-    "    missing.append(floats.apply(pd.isnull).sum())\n",
-    "\n",
-    "combined_missing = pd.concat(missing)\n",
-    "combined_missing.groupby(combined_missing.index).sum().sort_values()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Calculate the total memory usage across all of the chunks."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "66.21605968475342"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "loans_chunks = pd.read_csv('loans_2007.csv',chunksize=3000)\n",
-    "\n",
-    "mem_usage = []\n",
-    "\n",
-    "for lc in loans_chunks:\n",
-    "    mem_usage.append(lc.memory_usage(deep=True).sum() / 1024 ** 2)\n",
-    "\n",
-    "sum(mem_usage)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Optimizing String Columns\n",
-    "\n",
-    "### Determine which string columns you can convert to a numeric type if you clean them. Let's focus on columns that would actually be useful for analysis and modeling."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['term',\n",
-       " 'int_rate',\n",
-       " 'grade',\n",
-       " 'sub_grade',\n",
-       " 'emp_title',\n",
-       " 'emp_length',\n",
-       " 'home_ownership',\n",
-       " 'verification_status',\n",
-       " 'issue_d',\n",
-       " 'loan_status',\n",
-       " 'pymnt_plan',\n",
-       " 'purpose',\n",
-       " 'title',\n",
-       " 'zip_code',\n",
-       " 'addr_state',\n",
-       " 'earliest_cr_line',\n",
-       " 'revol_util',\n",
-       " 'initial_list_status',\n",
-       " 'last_pymnt_d',\n",
-       " 'last_credit_pull_d',\n",
-       " 'application_type']"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "obj_cols"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "useful_obj_cols = ['term', 'sub_grade', 'emp_title', 'home_ownership', 'verification_status', 'issue_d', 'purpose', 'earliest_cr_line', 'revol_util', 'last_pymnt_d', 'last_credit_pull_d']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "## Create dictionary (key: column, value: list of Series objects representing each chunk's value counts)\n",
-    "chunk_iter = pd.read_csv('loans_2007.csv', chunksize=3000)\n",
-    "str_cols_vc = {}\n",
-    "for chunk in chunk_iter:\n",
-    "    str_cols = chunk.select_dtypes(include=['object'])\n",
-    "    for col in str_cols.columns:\n",
-    "        current_col_vc = str_cols[col].value_counts()\n",
-    "        if col in str_cols_vc:\n",
-    "            str_cols_vc[col].append(current_col_vc)\n",
-    "        else:\n",
-    "            str_cols_vc[col] = [current_col_vc]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "## Combine the value counts.\n",
-    "combined_vcs = {}\n",
-    "\n",
-    "for col in str_cols_vc:\n",
-    "    combined_vc = pd.concat(str_cols_vc[col])\n",
-    "    final_vc = combined_vc.groupby(combined_vc.index).sum()\n",
-    "    combined_vcs[col] = final_vc"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "term\n",
-      " 36 months    31534\n",
-      " 60 months    11001\n",
-      "Name: term, dtype: int64\n",
-      "-----------\n",
-      "sub_grade\n",
-      "A1    1142\n",
-      "A2    1520\n",
-      "A3    1823\n",
-      "A4    2905\n",
-      "A5    2793\n",
-      "B1    1882\n",
-      "B2    2113\n",
-      "B3    2997\n",
-      "B4    2590\n",
-      "B5    2807\n",
-      "C1    2264\n",
-      "C2    2157\n",
-      "C3    1658\n",
-      "C4    1370\n",
-      "C5    1291\n",
-      "D1    1053\n",
-      "D2    1485\n",
-      "D3    1322\n",
-      "D4    1140\n",
-      "D5    1016\n",
-      "E1     884\n",
-      "E2     791\n",
-      "E3     668\n",
-      "E4     552\n",
-      "E5     499\n",
-      "F1     392\n",
-      "F2     308\n",
-      "F3     236\n",
-      "F4     211\n",
-      "F5     154\n",
-      "G1     141\n",
-      "G2     107\n",
-      "G3      79\n",
-      "G4      99\n",
-      "G5      86\n",
-      "Name: sub_grade, dtype: int64\n",
-      "-----------\n",
-      "emp_title\n",
-      "  old palm inc                       1\n",
-      " Brocade Communications              1\n",
-      " CenturyLink                         1\n",
-      " Department of Homeland Security     1\n",
-      " Down To Earth Distributors, Inc.    1\n",
-      "                                    ..\n",
-      "zashko inc.                          1\n",
-      "zeno office solutions                1\n",
-      "zion lutheran school                 1\n",
-      "zoll medical corp                    1\n",
-      "zozaya officiating                   1\n",
-      "Name: emp_title, Length: 30658, dtype: int64\n",
-      "-----------\n",
-      "home_ownership\n",
-      "MORTGAGE    18959\n",
-      "NONE            8\n",
-      "OTHER         136\n",
-      "OWN          3251\n",
-      "RENT        20181\n",
-      "Name: home_ownership, dtype: int64\n",
-      "-----------\n",
-      "verification_status\n",
-      "Not Verified       18758\n",
-      "Source Verified    10306\n",
-      "Verified           13471\n",
-      "Name: verification_status, dtype: int64\n",
-      "-----------\n",
-      "issue_d\n",
-      "Apr-2008     259\n",
-      "Apr-2009     333\n",
-      "Apr-2010     912\n",
-      "Apr-2011    1563\n",
-      "Aug-2007      74\n",
-      "Aug-2008     100\n",
-      "Aug-2009     446\n",
-      "Aug-2010    1175\n",
-      "Aug-2011    1934\n",
-      "Dec-2007     172\n",
-      "Dec-2008     253\n",
-      "Dec-2009     658\n",
-      "Dec-2010    1335\n",
-      "Dec-2011    2267\n",
-      "Feb-2008     306\n",
-      "Feb-2009     302\n",
-      "Feb-2010     682\n",
-      "Feb-2011    1298\n",
-      "Jan-2008     305\n",
-      "Jan-2009     269\n",
-      "Jan-2010     662\n",
-      "Jan-2011    1380\n",
-      "Jul-2007      63\n",
-      "Jul-2008     141\n",
-      "Jul-2009     411\n",
-      "Jul-2010    1204\n",
-      "Jul-2011    1875\n",
-      "Jun-2007      24\n",
-      "Jun-2008     124\n",
-      "Jun-2009     406\n",
-      "Jun-2010    1105\n",
-      "Jun-2011    1835\n",
-      "Mar-2008     402\n",
-      "Mar-2009     324\n",
-      "Mar-2010     828\n",
-      "Mar-2011    1448\n",
-      "May-2008     115\n",
-      "May-2009     359\n",
-      "May-2010     989\n",
-      "May-2011    1704\n",
-      "Nov-2007     112\n",
-      "Nov-2008     209\n",
-      "Nov-2009     662\n",
-      "Nov-2010    1224\n",
-      "Nov-2011    2232\n",
-      "Oct-2007     105\n",
-      "Oct-2008     122\n",
-      "Oct-2009     604\n",
-      "Oct-2010    1232\n",
-      "Oct-2011    2118\n",
-      "Sep-2007      53\n",
-      "Sep-2008      57\n",
-      "Sep-2009     507\n",
-      "Sep-2010    1189\n",
-      "Sep-2011    2067\n",
-      "Name: issue_d, dtype: int64\n",
-      "-----------\n",
-      "purpose\n",
-      "car                    1615\n",
-      "credit_card            5477\n",
-      "debt_consolidation    19776\n",
-      "educational             422\n",
-      "home_improvement       3199\n",
-      "house                   426\n",
-      "major_purchase         2311\n",
-      "medical                 753\n",
-      "moving                  629\n",
-      "other                  4425\n",
-      "renewable_energy        106\n",
-      "small_business         1992\n",
-      "vacation                400\n",
-      "wedding                1004\n",
-      "Name: purpose, dtype: int64\n",
-      "-----------\n",
-      "earliest_cr_line\n",
-      "Apr-1964      3\n",
-      "Apr-1966      1\n",
-      "Apr-1967      4\n",
-      "Apr-1968      1\n",
-      "Apr-1969      1\n",
-      "           ... \n",
-      "Sep-2004    221\n",
-      "Sep-2005    162\n",
-      "Sep-2006    150\n",
-      "Sep-2007     63\n",
-      "Sep-2008      8\n",
-      "Name: earliest_cr_line, Length: 530, dtype: int64\n",
-      "-----------\n",
-      "revol_util\n",
-      "0%       1070\n",
-      "0.01%       1\n",
-      "0.03%       1\n",
-      "0.04%       1\n",
-      "0.05%       1\n",
-      "         ... \n",
-      "99.5%      24\n",
-      "99.6%      27\n",
-      "99.7%      32\n",
-      "99.8%      25\n",
-      "99.9%      29\n",
-      "Name: revol_util, Length: 1119, dtype: int64\n",
-      "-----------\n",
-      "last_pymnt_d\n",
-      "Apr-2008     23\n",
-      "Apr-2009     72\n",
-      "Apr-2010    145\n",
-      "Apr-2011    519\n",
-      "Apr-2012    781\n",
-      "           ... \n",
-      "Sep-2011    491\n",
-      "Sep-2012    802\n",
-      "Sep-2013    712\n",
-      "Sep-2014    694\n",
-      "Sep-2015    211\n",
-      "Name: last_pymnt_d, Length: 103, dtype: int64\n",
-      "-----------\n",
-      "last_credit_pull_d\n",
-      "Apr-2009     24\n",
-      "Apr-2010     77\n",
-      "Apr-2011    177\n",
-      "Apr-2012    326\n",
-      "Apr-2013    445\n",
-      "           ... \n",
-      "Sep-2011    175\n",
-      "Sep-2012    414\n",
-      "Sep-2013    408\n",
-      "Sep-2014    564\n",
-      "Sep-2015    531\n",
-      "Name: last_credit_pull_d, Length: 108, dtype: int64\n",
-      "-----------\n"
-     ]
-    }
-   ],
-   "source": [
-    "for col in useful_obj_cols:\n",
-    "    print(col)\n",
-    "    print(combined_vcs[col])\n",
-    "    print(\"-----------\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Convert to category."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "convert_col_dtypes = {\n",
-    "    \"sub_grade\": \"category\", \"home_ownership\": \"category\", \n",
-    "    \"verification_status\": \"category\", \"purpose\": \"category\"\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Convert `term` and `revol_util` to numerical by data cleaning.\n",
-    "### Convert `issue_d`, `earliest_cr_line`, `last_pymnt_d`, and `last_credit_pull_d` to datetime."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>term</th>\n",
-       "      <th>sub_grade</th>\n",
-       "      <th>emp_title</th>\n",
-       "      <th>home_ownership</th>\n",
-       "      <th>verification_status</th>\n",
-       "      <th>issue_d</th>\n",
-       "      <th>purpose</th>\n",
-       "      <th>earliest_cr_line</th>\n",
-       "      <th>revol_util</th>\n",
-       "      <th>last_pymnt_d</th>\n",
-       "      <th>last_credit_pull_d</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>42000</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>C2</td>\n",
-       "      <td>Best Buy</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Feb-2008</td>\n",
-       "      <td>debt_consolidation</td>\n",
-       "      <td>Jul-2000</td>\n",
-       "      <td>100.7%</td>\n",
-       "      <td>Feb-2011</td>\n",
-       "      <td>Jun-2016</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42001</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>G2</td>\n",
-       "      <td>CVS PHARMACY</td>\n",
-       "      <td>OWN</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Feb-2008</td>\n",
-       "      <td>debt_consolidation</td>\n",
-       "      <td>Mar-1989</td>\n",
-       "      <td>51.9%</td>\n",
-       "      <td>Nov-2008</td>\n",
-       "      <td>Jun-2016</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42002</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>E4</td>\n",
-       "      <td>General Motors</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Feb-2008</td>\n",
-       "      <td>debt_consolidation</td>\n",
-       "      <td>Dec-1998</td>\n",
-       "      <td>80.7%</td>\n",
-       "      <td>Feb-2011</td>\n",
-       "      <td>Jun-2016</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42003</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>G4</td>\n",
-       "      <td>usa medical center</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Feb-2008</td>\n",
-       "      <td>debt_consolidation</td>\n",
-       "      <td>Jul-1995</td>\n",
-       "      <td>57.2%</td>\n",
-       "      <td>Feb-2011</td>\n",
-       "      <td>Jun-2011</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42004</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>B3</td>\n",
-       "      <td>InvestSource Inc</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Feb-2008</td>\n",
-       "      <td>debt_consolidation</td>\n",
-       "      <td>Sep-2005</td>\n",
-       "      <td>74%</td>\n",
-       "      <td>Mar-2010</td>\n",
-       "      <td>Aug-2010</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42533</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>B3</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>RENT</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Jun-2007</td>\n",
-       "      <td>other</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Jun-2010</td>\n",
-       "      <td>May-2007</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42534</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>A5</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NONE</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Jun-2007</td>\n",
-       "      <td>other</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Jun-2010</td>\n",
-       "      <td>Aug-2007</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42535</th>\n",
-       "      <td>36 months</td>\n",
-       "      <td>A3</td>\n",
-       "      <td>Homemaker</td>\n",
-       "      <td>MORTGAGE</td>\n",
-       "      <td>Not Verified</td>\n",
-       "      <td>Jun-2007</td>\n",
-       "      <td>other</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Jun-2010</td>\n",
-       "      <td>Feb-2015</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42536</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>42537</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>538 rows ร— 11 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "             term sub_grade           emp_title home_ownership  \\\n",
-       "42000   36 months        C2            Best Buy           RENT   \n",
-       "42001   36 months        G2        CVS PHARMACY            OWN   \n",
-       "42002   36 months        E4      General Motors           RENT   \n",
-       "42003   36 months        G4  usa medical center           RENT   \n",
-       "42004   36 months        B3    InvestSource Inc           RENT   \n",
-       "...           ...       ...                 ...            ...   \n",
-       "42533   36 months        B3                 NaN           RENT   \n",
-       "42534   36 months        A5                 NaN           NONE   \n",
-       "42535   36 months        A3           Homemaker       MORTGAGE   \n",
-       "42536         NaN       NaN                 NaN            NaN   \n",
-       "42537         NaN       NaN                 NaN            NaN   \n",
-       "\n",
-       "      verification_status   issue_d             purpose earliest_cr_line  \\\n",
-       "42000        Not Verified  Feb-2008  debt_consolidation         Jul-2000   \n",
-       "42001        Not Verified  Feb-2008  debt_consolidation         Mar-1989   \n",
-       "42002        Not Verified  Feb-2008  debt_consolidation         Dec-1998   \n",
-       "42003        Not Verified  Feb-2008  debt_consolidation         Jul-1995   \n",
-       "42004        Not Verified  Feb-2008  debt_consolidation         Sep-2005   \n",
-       "...                   ...       ...                 ...              ...   \n",
-       "42533        Not Verified  Jun-2007               other              NaN   \n",
-       "42534        Not Verified  Jun-2007               other              NaN   \n",
-       "42535        Not Verified  Jun-2007               other              NaN   \n",
-       "42536                 NaN       NaN                 NaN              NaN   \n",
-       "42537                 NaN       NaN                 NaN              NaN   \n",
-       "\n",
-       "      revol_util last_pymnt_d last_credit_pull_d  \n",
-       "42000     100.7%     Feb-2011           Jun-2016  \n",
-       "42001      51.9%     Nov-2008           Jun-2016  \n",
-       "42002      80.7%     Feb-2011           Jun-2016  \n",
-       "42003      57.2%     Feb-2011           Jun-2011  \n",
-       "42004        74%     Mar-2010           Aug-2010  \n",
-       "...          ...          ...                ...  \n",
-       "42533        NaN     Jun-2010           May-2007  \n",
-       "42534        NaN     Jun-2010           Aug-2007  \n",
-       "42535        NaN     Jun-2010           Feb-2015  \n",
-       "42536        NaN          NaN                NaN  \n",
-       "42537        NaN          NaN                NaN  \n",
-       "\n",
-       "[538 rows x 11 columns]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chunk[useful_obj_cols]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "id                                    object\n",
-       "member_id                            float64\n",
-       "loan_amnt                            float64\n",
-       "funded_amnt                          float64\n",
-       "funded_amnt_inv                      float64\n",
-       "term                                 float64\n",
-       "int_rate                              object\n",
-       "installment                          float64\n",
-       "grade                                 object\n",
-       "sub_grade                           category\n",
-       "emp_title                             object\n",
-       "emp_length                            object\n",
-       "home_ownership                      category\n",
-       "annual_inc                           float64\n",
-       "verification_status                 category\n",
-       "issue_d                       datetime64[ns]\n",
-       "loan_status                           object\n",
-       "pymnt_plan                            object\n",
-       "purpose                             category\n",
-       "title                                 object\n",
-       "zip_code                              object\n",
-       "addr_state                            object\n",
-       "dti                                  float64\n",
-       "delinq_2yrs                          float64\n",
-       "earliest_cr_line              datetime64[ns]\n",
-       "inq_last_6mths                       float64\n",
-       "open_acc                             float64\n",
-       "pub_rec                              float64\n",
-       "revol_bal                            float64\n",
-       "revol_util                           float64\n",
-       "total_acc                            float64\n",
-       "initial_list_status                   object\n",
-       "out_prncp                            float64\n",
-       "out_prncp_inv                        float64\n",
-       "total_pymnt                          float64\n",
-       "total_pymnt_inv                      float64\n",
-       "total_rec_prncp                      float64\n",
-       "total_rec_int                        float64\n",
-       "total_rec_late_fee                   float64\n",
-       "recoveries                           float64\n",
-       "collection_recovery_fee              float64\n",
-       "last_pymnt_d                  datetime64[ns]\n",
-       "last_pymnt_amnt                      float64\n",
-       "last_credit_pull_d            datetime64[ns]\n",
-       "collections_12_mths_ex_med           float64\n",
-       "policy_code                          float64\n",
-       "application_type                      object\n",
-       "acc_now_delinq                       float64\n",
-       "chargeoff_within_12_mths             float64\n",
-       "delinq_amnt                          float64\n",
-       "pub_rec_bankruptcies                 float64\n",
-       "tax_liens                            float64\n",
-       "dtype: object"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chunk_iter = pd.read_csv('loans_2007.csv', chunksize=3000, dtype=convert_col_dtypes, parse_dates=[\"issue_d\", \"earliest_cr_line\", \"last_pymnt_d\", \"last_credit_pull_d\"])\n",
-    "\n",
-    "for chunk in chunk_iter:\n",
-    "    term_cleaned = chunk['term'].str.lstrip(\" \").str.rstrip(\" months\")\n",
-    "    revol_cleaned = chunk['revol_util'].str.rstrip(\"%\")\n",
-    "    chunk['term'] = pd.to_numeric(term_cleaned)\n",
-    "    chunk['revol_util'] = pd.to_numeric(revol_cleaned)\n",
-    "    \n",
-    "chunk.dtypes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'member_id': 3,\n",
-       " 'loan_amnt': 3,\n",
-       " 'funded_amnt': 3,\n",
-       " 'funded_amnt_inv': 3,\n",
-       " 'installment': 3,\n",
-       " 'annual_inc': 7,\n",
-       " 'dti': 3,\n",
-       " 'delinq_2yrs': 32,\n",
-       " 'inq_last_6mths': 32,\n",
-       " 'open_acc': 32,\n",
-       " 'pub_rec': 32,\n",
-       " 'revol_bal': 3,\n",
-       " 'revol_util': 93,\n",
-       " 'total_acc': 32,\n",
-       " 'out_prncp': 3,\n",
-       " 'out_prncp_inv': 3,\n",
-       " 'total_pymnt': 3,\n",
-       " 'total_pymnt_inv': 3,\n",
-       " 'total_rec_prncp': 3,\n",
-       " 'total_rec_int': 3,\n",
-       " 'total_rec_late_fee': 3,\n",
-       " 'recoveries': 3,\n",
-       " 'collection_recovery_fee': 3,\n",
-       " 'last_pymnt_amnt': 3,\n",
-       " 'collections_12_mths_ex_med': 148,\n",
-       " 'policy_code': 3,\n",
-       " 'acc_now_delinq': 32,\n",
-       " 'chargeoff_within_12_mths': 148,\n",
-       " 'delinq_amnt': 32,\n",
-       " 'pub_rec_bankruptcies': 1368,\n",
-       " 'tax_liens': 108,\n",
-       " 'term': 3}"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chunk_iter = pd.read_csv('loans_2007.csv', chunksize=3000, dtype=convert_col_dtypes, parse_dates=[\"issue_d\", \"earliest_cr_line\", \"last_pymnt_d\", \"last_credit_pull_d\"])\n",
-    "mv_counts = {}\n",
-    "for chunk in chunk_iter:\n",
-    "    term_cleaned = chunk['term'].str.lstrip(\" \").str.rstrip(\" months\")\n",
-    "    revol_cleaned = chunk['revol_util'].str.rstrip(\"%\")\n",
-    "    chunk['term'] = pd.to_numeric(term_cleaned)\n",
-    "    chunk['revol_util'] = pd.to_numeric(revol_cleaned)\n",
-    "    float_cols = chunk.select_dtypes(include=['float'])\n",
-    "    for col in float_cols.columns:\n",
-    "        missing_values = len(chunk) - chunk[col].count()\n",
-    "        if col in mv_counts:\n",
-    "            mv_counts[col] = mv_counts[col] + missing_values\n",
-    "        else:\n",
-    "            mv_counts[col] = missing_values\n",
-    "mv_counts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'member_id': 3,\n",
-       " 'loan_amnt': 3,\n",
-       " 'funded_amnt': 3,\n",
-       " 'funded_amnt_inv': 3,\n",
-       " 'installment': 3,\n",
-       " 'annual_inc': 7,\n",
-       " 'dti': 3,\n",
-       " 'delinq_2yrs': 32,\n",
-       " 'inq_last_6mths': 32,\n",
-       " 'open_acc': 32,\n",
-       " 'pub_rec': 32,\n",
-       " 'revol_bal': 3,\n",
-       " 'revol_util': 93,\n",
-       " 'total_acc': 32,\n",
-       " 'out_prncp': 3,\n",
-       " 'out_prncp_inv': 3,\n",
-       " 'total_pymnt': 3,\n",
-       " 'total_pymnt_inv': 3,\n",
-       " 'total_rec_prncp': 3,\n",
-       " 'total_rec_int': 3,\n",
-       " 'total_rec_late_fee': 3,\n",
-       " 'recoveries': 3,\n",
-       " 'collection_recovery_fee': 3,\n",
-       " 'last_pymnt_amnt': 3,\n",
-       " 'collections_12_mths_ex_med': 148,\n",
-       " 'policy_code': 3,\n",
-       " 'acc_now_delinq': 32,\n",
-       " 'chargeoff_within_12_mths': 148,\n",
-       " 'delinq_amnt': 32,\n",
-       " 'pub_rec_bankruptcies': 1368,\n",
-       " 'tax_liens': 108,\n",
-       " 'term': 3}"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chunk_iter = pd.read_csv('loans_2007.csv', chunksize=3000, dtype=convert_col_dtypes, parse_dates=[\"issue_d\", \"earliest_cr_line\", \"last_pymnt_d\", \"last_credit_pull_d\"])\n",
-    "mv_counts = {}\n",
-    "for chunk in chunk_iter:\n",
-    "    term_cleaned = chunk['term'].str.lstrip(\" \").str.rstrip(\" months\")\n",
-    "    revol_cleaned = chunk['revol_util'].str.rstrip(\"%\")\n",
-    "    chunk['term'] = pd.to_numeric(term_cleaned)\n",
-    "    chunk['revol_util'] = pd.to_numeric(revol_cleaned)\n",
-    "    chunk = chunk.dropna(how='all')\n",
-    "    float_cols = chunk.select_dtypes(include=['float'])\n",
-    "    for col in float_cols.columns:\n",
-    "        missing_values = len(chunk) - chunk[col].count()\n",
-    "        if col in mv_counts:\n",
-    "            mv_counts[col] = mv_counts[col] + missing_values\n",
-    "        else:\n",
-    "            mv_counts[col] = missing_values\n",
-    "mv_counts"
-   ]
-  }
- ],
- "metadata": {
-  "anaconda-cloud": {},
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 698
Mission167Solutions.ipynb

@@ -1,698 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Introduction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "pd.options.display.max_columns = 99\n",
-    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Compute each column's missing value counts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "company_country_code          1\n",
-       "company_name                  1\n",
-       "company_permalink             1\n",
-       "company_region                1\n",
-       "investor_region               2\n",
-       "investor_permalink            2\n",
-       "investor_name                 2\n",
-       "funded_quarter                3\n",
-       "funded_at                     3\n",
-       "funded_month                  3\n",
-       "funded_year                   3\n",
-       "funding_round_type            3\n",
-       "company_state_code          492\n",
-       "company_city                533\n",
-       "company_category_code       643\n",
-       "raised_amount_usd          3599\n",
-       "investor_country_code     12001\n",
-       "investor_city             12480\n",
-       "investor_state_code       16809\n",
-       "investor_category_code    50427\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mv_list = []\n",
-    "for chunk in chunk_iter:\n",
-    "    mv_list.append(chunk.isnull().sum())\n",
-    "    \n",
-    "combined_mv_vc = pd.concat(mv_list)\n",
-    "unique_combined_mv_vc = combined_mv_vc.groupby(combined_mv_vc.index).sum()\n",
-    "unique_combined_mv_vc.sort_values()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Total memory footprint for each column"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "company_permalink         4057788\n",
-       "company_name              3591326\n",
-       "company_category_code     3421104\n",
-       "company_country_code      3172176\n",
-       "company_state_code        3106051\n",
-       "company_region            3411585\n",
-       "company_city              3505926\n",
-       "investor_permalink        4980548\n",
-       "investor_name             3915666\n",
-       "investor_category_code     622424\n",
-       "investor_country_code     2647292\n",
-       "investor_state_code       2476607\n",
-       "investor_region           3396281\n",
-       "investor_city             2885083\n",
-       "funding_round_type        3410707\n",
-       "funded_at                 3542185\n",
-       "funded_month              3383584\n",
-       "funded_quarter            3383584\n",
-       "funded_year                422960\n",
-       "raised_amount_usd          422960\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')\n",
-    "counter = 0\n",
-    "series_memory_fp = pd.Series(dtype='float64')\n",
-    "for chunk in chunk_iter:\n",
-    "    if counter == 0:\n",
-    "        series_memory_fp = chunk.memory_usage(deep=True)\n",
-    "    else:\n",
-    "        series_memory_fp += chunk.memory_usage(deep=True)\n",
-    "    counter += 1\n",
-    "\n",
-    "# Drop memory footprint calculation for the index.\n",
-    "series_memory_fp = series_memory_fp.drop('Index')\n",
-    "series_memory_fp"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Total memory footprint of the data (in megabytes)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "56.9876070022583"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "series_memory_fp.sum() / (1024 * 1024)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "company_country_code          1\n",
-       "company_name                  1\n",
-       "company_permalink             1\n",
-       "company_region                1\n",
-       "investor_region               2\n",
-       "investor_permalink            2\n",
-       "investor_name                 2\n",
-       "funded_quarter                3\n",
-       "funded_at                     3\n",
-       "funded_month                  3\n",
-       "funded_year                   3\n",
-       "funding_round_type            3\n",
-       "company_state_code          492\n",
-       "company_city                533\n",
-       "company_category_code       643\n",
-       "raised_amount_usd          3599\n",
-       "investor_country_code     12001\n",
-       "investor_city             12480\n",
-       "investor_state_code       16809\n",
-       "investor_category_code    50427\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "unique_combined_mv_vc.sort_values()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Drop columns representing URLs or containing too many missing values (>90% missing)\n",
-    "drop_cols = ['investor_permalink', 'company_permalink', 'investor_category_code']\n",
-    "keep_cols = chunk.columns.drop(drop_cols)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<bound method IndexOpsMixin.tolist of Index(['company_name', 'company_category_code', 'company_country_code',\n",
-       "       'company_state_code', 'company_region', 'company_city', 'investor_name',\n",
-       "       'investor_country_code', 'investor_state_code', 'investor_region',\n",
-       "       'investor_city', 'funding_round_type', 'funded_at', 'funded_month',\n",
-       "       'funded_quarter', 'funded_year', 'raised_amount_usd'],\n",
-       "      dtype='object')>"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "keep_cols.tolist"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Selecting Data Types\n",
-    "\n",
-    "Let's first determine which columns shift types across chunks. Note that we only lay the groundwork for this step."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# Key: Column name, Value: List of types\n",
-    "col_types = {}\n",
-    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1', usecols=keep_cols)\n",
-    "\n",
-    "for chunk in chunk_iter:\n",
-    "    for col in chunk.columns:\n",
-    "        if col not in col_types:\n",
-    "            col_types[col] = [str(chunk.dtypes[col])]\n",
-    "        else:\n",
-    "            col_types[col].append(str(chunk.dtypes[col]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'company_name': {'object'},\n",
-       " 'company_category_code': {'object'},\n",
-       " 'company_country_code': {'object'},\n",
-       " 'company_state_code': {'object'},\n",
-       " 'company_region': {'object'},\n",
-       " 'company_city': {'object'},\n",
-       " 'investor_name': {'object'},\n",
-       " 'investor_country_code': {'float64', 'object'},\n",
-       " 'investor_state_code': {'float64', 'object'},\n",
-       " 'investor_region': {'object'},\n",
-       " 'investor_city': {'float64', 'object'},\n",
-       " 'funding_round_type': {'object'},\n",
-       " 'funded_at': {'object'},\n",
-       " 'funded_month': {'object'},\n",
-       " 'funded_quarter': {'object'},\n",
-       " 'funded_year': {'float64', 'int64'},\n",
-       " 'raised_amount_usd': {'float64'}}"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "uniq_col_types = {}\n",
-    "for k,v in col_types.items():\n",
-    "    uniq_col_types[k] = set(col_types[k])\n",
-    "uniq_col_types"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>company_name</th>\n",
-       "      <th>company_category_code</th>\n",
-       "      <th>company_country_code</th>\n",
-       "      <th>company_state_code</th>\n",
-       "      <th>company_region</th>\n",
-       "      <th>company_city</th>\n",
-       "      <th>investor_name</th>\n",
-       "      <th>investor_country_code</th>\n",
-       "      <th>investor_state_code</th>\n",
-       "      <th>investor_region</th>\n",
-       "      <th>investor_city</th>\n",
-       "      <th>funding_round_type</th>\n",
-       "      <th>funded_at</th>\n",
-       "      <th>funded_month</th>\n",
-       "      <th>funded_quarter</th>\n",
-       "      <th>funded_year</th>\n",
-       "      <th>raised_amount_usd</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>50000</th>\n",
-       "      <td>NuORDER</td>\n",
-       "      <td>fashion</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>CA</td>\n",
-       "      <td>Los Angeles</td>\n",
-       "      <td>West Hollywood</td>\n",
-       "      <td>Mortimer Singer</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>series-a</td>\n",
-       "      <td>2012-10-01</td>\n",
-       "      <td>2012-10</td>\n",
-       "      <td>2012-Q4</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>3060000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>50001</th>\n",
-       "      <td>ChaCha</td>\n",
-       "      <td>advertising</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>IN</td>\n",
-       "      <td>Indianapolis</td>\n",
-       "      <td>Carmel</td>\n",
-       "      <td>Morton Meyerson</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>series-b</td>\n",
-       "      <td>2007-10-01</td>\n",
-       "      <td>2007-10</td>\n",
-       "      <td>2007-Q4</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>12000000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>50002</th>\n",
-       "      <td>Binfire</td>\n",
-       "      <td>software</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>FL</td>\n",
-       "      <td>Bocat Raton</td>\n",
-       "      <td>Bocat Raton</td>\n",
-       "      <td>Moshe Ariel</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>angel</td>\n",
-       "      <td>2008-04-18</td>\n",
-       "      <td>2008-04</td>\n",
-       "      <td>2008-Q2</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>500000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>50003</th>\n",
-       "      <td>Binfire</td>\n",
-       "      <td>software</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>FL</td>\n",
-       "      <td>Bocat Raton</td>\n",
-       "      <td>Bocat Raton</td>\n",
-       "      <td>Moshe Ariel</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>angel</td>\n",
-       "      <td>2010-01-01</td>\n",
-       "      <td>2010-01</td>\n",
-       "      <td>2010-Q1</td>\n",
-       "      <td>2010</td>\n",
-       "      <td>750000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>50004</th>\n",
-       "      <td>Unified Color</td>\n",
-       "      <td>software</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>CA</td>\n",
-       "      <td>SF Bay</td>\n",
-       "      <td>South San Frnacisco</td>\n",
-       "      <td>Mr. Andrew Oung</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>angel</td>\n",
-       "      <td>2010-01-01</td>\n",
-       "      <td>2010-01</td>\n",
-       "      <td>2010-Q1</td>\n",
-       "      <td>2010</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>52865</th>\n",
-       "      <td>Garantia Data</td>\n",
-       "      <td>enterprise</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>CA</td>\n",
-       "      <td>SF Bay</td>\n",
-       "      <td>Santa Clara</td>\n",
-       "      <td>Zohar Gilon</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>series-a</td>\n",
-       "      <td>2012-08-08</td>\n",
-       "      <td>2012-08</td>\n",
-       "      <td>2012-Q3</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>3800000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>52866</th>\n",
-       "      <td>DudaMobile</td>\n",
-       "      <td>mobile</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>CA</td>\n",
-       "      <td>SF Bay</td>\n",
-       "      <td>Palo Alto</td>\n",
-       "      <td>Zohar Gilon</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>series-c+</td>\n",
-       "      <td>2013-04-08</td>\n",
-       "      <td>2013-04</td>\n",
-       "      <td>2013-Q2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>10300000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>52867</th>\n",
-       "      <td>SiteBrains</td>\n",
-       "      <td>software</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>CA</td>\n",
-       "      <td>SF Bay</td>\n",
-       "      <td>San Francisco</td>\n",
-       "      <td>zohar israel</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>angel</td>\n",
-       "      <td>2010-08-01</td>\n",
-       "      <td>2010-08</td>\n",
-       "      <td>2010-Q3</td>\n",
-       "      <td>2010</td>\n",
-       "      <td>350000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>52868</th>\n",
-       "      <td>Comprehend Systems</td>\n",
-       "      <td>enterprise</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>CA</td>\n",
-       "      <td>SF Bay</td>\n",
-       "      <td>Palo Alto</td>\n",
-       "      <td>Zorba Lieberman</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>series-a</td>\n",
-       "      <td>2013-07-11</td>\n",
-       "      <td>2013-07</td>\n",
-       "      <td>2013-Q3</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>8400000.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>52869</th>\n",
-       "      <td>SmartThings</td>\n",
-       "      <td>mobile</td>\n",
-       "      <td>USA</td>\n",
-       "      <td>DC</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>Minneapolis</td>\n",
-       "      <td>Zorik Gordon</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>unknown</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>series-a</td>\n",
-       "      <td>2012-12-04</td>\n",
-       "      <td>2012-12</td>\n",
-       "      <td>2012-Q4</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>3000000.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2870 rows ร— 17 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "             company_name company_category_code company_country_code  \\\n",
-       "50000             NuORDER               fashion                  USA   \n",
-       "50001              ChaCha           advertising                  USA   \n",
-       "50002             Binfire              software                  USA   \n",
-       "50003             Binfire              software                  USA   \n",
-       "50004       Unified Color              software                  USA   \n",
-       "...                   ...                   ...                  ...   \n",
-       "52865       Garantia Data            enterprise                  USA   \n",
-       "52866          DudaMobile                mobile                  USA   \n",
-       "52867          SiteBrains              software                  USA   \n",
-       "52868  Comprehend Systems            enterprise                  USA   \n",
-       "52869         SmartThings                mobile                  USA   \n",
-       "\n",
-       "      company_state_code company_region         company_city    investor_name  \\\n",
-       "50000                 CA    Los Angeles       West Hollywood  Mortimer Singer   \n",
-       "50001                 IN   Indianapolis               Carmel  Morton Meyerson   \n",
-       "50002                 FL    Bocat Raton          Bocat Raton      Moshe Ariel   \n",
-       "50003                 FL    Bocat Raton          Bocat Raton      Moshe Ariel   \n",
-       "50004                 CA         SF Bay  South San Frnacisco  Mr. Andrew Oung   \n",
-       "...                  ...            ...                  ...              ...   \n",
-       "52865                 CA         SF Bay          Santa Clara      Zohar Gilon   \n",
-       "52866                 CA         SF Bay            Palo Alto      Zohar Gilon   \n",
-       "52867                 CA         SF Bay        San Francisco     zohar israel   \n",
-       "52868                 CA         SF Bay            Palo Alto  Zorba Lieberman   \n",
-       "52869                 DC        unknown          Minneapolis     Zorik Gordon   \n",
-       "\n",
-       "       investor_country_code  investor_state_code investor_region  \\\n",
-       "50000                    NaN                  NaN         unknown   \n",
-       "50001                    NaN                  NaN         unknown   \n",
-       "50002                    NaN                  NaN         unknown   \n",
-       "50003                    NaN                  NaN         unknown   \n",
-       "50004                    NaN                  NaN         unknown   \n",
-       "...                      ...                  ...             ...   \n",
-       "52865                    NaN                  NaN         unknown   \n",
-       "52866                    NaN                  NaN         unknown   \n",
-       "52867                    NaN                  NaN         unknown   \n",
-       "52868                    NaN                  NaN         unknown   \n",
-       "52869                    NaN                  NaN         unknown   \n",
-       "\n",
-       "       investor_city funding_round_type   funded_at funded_month  \\\n",
-       "50000            NaN           series-a  2012-10-01      2012-10   \n",
-       "50001            NaN           series-b  2007-10-01      2007-10   \n",
-       "50002            NaN              angel  2008-04-18      2008-04   \n",
-       "50003            NaN              angel  2010-01-01      2010-01   \n",
-       "50004            NaN              angel  2010-01-01      2010-01   \n",
-       "...              ...                ...         ...          ...   \n",
-       "52865            NaN           series-a  2012-08-08      2012-08   \n",
-       "52866            NaN          series-c+  2013-04-08      2013-04   \n",
-       "52867            NaN              angel  2010-08-01      2010-08   \n",
-       "52868            NaN           series-a  2013-07-11      2013-07   \n",
-       "52869            NaN           series-a  2012-12-04      2012-12   \n",
-       "\n",
-       "      funded_quarter  funded_year  raised_amount_usd  \n",
-       "50000        2012-Q4         2012          3060000.0  \n",
-       "50001        2007-Q4         2007         12000000.0  \n",
-       "50002        2008-Q2         2008           500000.0  \n",
-       "50003        2010-Q1         2010           750000.0  \n",
-       "50004        2010-Q1         2010                NaN  \n",
-       "...              ...          ...                ...  \n",
-       "52865        2012-Q3         2012          3800000.0  \n",
-       "52866        2013-Q2         2013         10300000.0  \n",
-       "52867        2010-Q3         2010           350000.0  \n",
-       "52868        2013-Q3         2013          8400000.0  \n",
-       "52869        2012-Q4         2012          3000000.0  \n",
-       "\n",
-       "[2870 rows x 17 columns]"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chunk"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Loading Chunks into SQLite"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sqlite3\n",
-    "conn = sqlite3.connect('crunchbase.db')\n",
-    "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')\n",
-    "\n",
-    "for chunk in chunk_iter:\n",
-    "    chunk.to_sql(\"investments\", conn, if_exists='append', index=False)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

+ 0 - 1006
Mission177Solutions.ipynb

@@ -1,1006 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Stock Price Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import pandas as pd\n",
-    "\n",
-    "stock_prices = {}\n",
-    "\n",
-    "for fn in os.listdir(\"prices\"):\n",
-    "    # Get the name of the file without extension \"aapl.csv\" -> \"aapl\"\n",
-    "    name = fn.split(\".\")[0]\n",
-    "    stock_prices[name] = pd.read_csv(os.path.join(\"prices\", fn))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We chose a dictionary where the keys are the stock symbols and the values are DataFrames from the corresponding CSV file.\n",
-    "\n",
-    "Let's display the data stored for the `aapl` stock symbol:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>date</th>\n",
-       "      <th>close</th>\n",
-       "      <th>open</th>\n",
-       "      <th>high</th>\n",
-       "      <th>low</th>\n",
-       "      <th>volume</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>0</td>\n",
-       "      <td>2007-01-03</td>\n",
-       "      <td>83.800002</td>\n",
-       "      <td>86.289999</td>\n",
-       "      <td>86.579999</td>\n",
-       "      <td>81.899999</td>\n",
-       "      <td>309579900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1</td>\n",
-       "      <td>2007-01-04</td>\n",
-       "      <td>85.659998</td>\n",
-       "      <td>84.050001</td>\n",
-       "      <td>85.949998</td>\n",
-       "      <td>83.820003</td>\n",
-       "      <td>211815100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2</td>\n",
-       "      <td>2007-01-05</td>\n",
-       "      <td>85.049997</td>\n",
-       "      <td>85.770000</td>\n",
-       "      <td>86.199997</td>\n",
-       "      <td>84.400002</td>\n",
-       "      <td>208685400</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>3</td>\n",
-       "      <td>2007-01-08</td>\n",
-       "      <td>85.470000</td>\n",
-       "      <td>85.959998</td>\n",
-       "      <td>86.529998</td>\n",
-       "      <td>85.280003</td>\n",
-       "      <td>199276700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>4</td>\n",
-       "      <td>2007-01-09</td>\n",
-       "      <td>92.570003</td>\n",
-       "      <td>86.450003</td>\n",
-       "      <td>92.979999</td>\n",
-       "      <td>85.150000</td>\n",
-       "      <td>837324600</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         date      close       open       high        low     volume\n",
-       "0  2007-01-03  83.800002  86.289999  86.579999  81.899999  309579900\n",
-       "1  2007-01-04  85.659998  84.050001  85.949998  83.820003  211815100\n",
-       "2  2007-01-05  85.049997  85.770000  86.199997  84.400002  208685400\n",
-       "3  2007-01-08  85.470000  85.959998  86.529998  85.280003  199276700\n",
-       "4  2007-01-09  92.570003  86.450003  92.979999  85.150000  837324600"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stock_prices[\"aapl\"].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Computing Aggregates"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Computing average closing prices "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "avg_closing_prices = {}\n",
-    "\n",
-    "for stock_sym in stock_prices:\n",
-    "    avg_closing_prices[stock_sym] = stock_prices[stock_sym][\"close\"].mean()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Displaying the average closing prices"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "eqix 165.3847721150579\n",
-      "club 7.270509651737427\n",
-      "bmrc 39.35481079459455\n",
-      "cald 8.608965250965264\n",
-      "cybe 9.964861003860992\n",
-      "bbry 43.67659082355207\n",
-      "chscp 29.07304635598456\n",
-      "essa 12.126070440047481\n",
-      "cprx 1.976200772200771\n",
-      "arrs 17.10461388532818\n",
-      "ctic 1.4943663119691135\n",
-      "adrd 22.51748262046331\n",
-      "arna 4.915745173745166\n",
-      "ffic 16.593648647876414\n",
-      "ca 25.746281860231644\n",
-      "alot 10.28669884208494\n",
-      "csfl 11.947644780694985\n",
-      "cern 65.04237453166031\n",
-      "fhco 4.28845945945947\n",
-      "dvax 6.0337528984555995\n",
-      "exel 6.616277998455593\n",
-      "abcb 17.990475994208477\n",
-      "alog 64.74335521467185\n",
-      "bncn 13.986131252895746\n",
-      "eltk 1.5323436293436348\n",
-      "fbiz 22.95887644826253\n",
-      "brks 10.52473359227799\n",
-      "cunb 15.99822393513515\n",
-      "clrb 1.204571143629345\n",
-      "agen 2.9998899559845587\n",
-      "amzn 275.1340775710431\n",
-      "eqfn 5.558436266023189\n",
-      "evep 31.358648642471\n",
-      "bnso 1.717254826254819\n",
-      "asys 8.914054046332067\n",
-      "fisi 19.938084950965262\n",
-      "cbio 8.433602686100393\n",
-      "flic 27.73225096177597\n",
-      "bmrn 50.521710407335874\n",
-      "bcbp 11.546521235135131\n",
-      "aezs 1.739144594980703\n",
-      "cmls 3.678938223938218\n",
-      "apwc 3.2336409266409234\n",
-      "cnit 3.6047451737451803\n",
-      "arkr 20.460409264092682\n",
-      "dave 12.284664105405401\n",
-      "ctas 50.47888414247106\n",
-      "cldx 9.006351276061771\n",
-      "apog 26.00773359150577\n",
-      "cbak 2.437895752895755\n",
-      "efii 25.840223945945958\n",
-      "crws 5.629305019305021\n",
-      "finl 17.241752891505783\n",
-      "abco 47.647057967567655\n",
-      "emkr 4.458320463320471\n",
-      "boch 6.063119691119678\n",
-      "ffhl 2.192687258687256\n",
-      "cbrl 76.63736287992297\n",
-      "botj 9.858123591891871\n",
-      "fcnca 200.25248278146725\n",
-      "aame 2.7796795366795344\n",
-      "achc 24.047795338223956\n",
-      "cake 34.31267570270274\n",
-      "ccbg 16.266409268725862\n",
-      "fmbi 17.35091119613901\n",
-      "cass 42.14163373474898\n",
-      "arlz 7.441718146718154\n",
-      "elgx 8.97644016370655\n",
-      "atrs 2.0350231660231692\n",
-      "dswl 3.529177606177609\n",
-      "csbr 1.228244384585441\n",
-      "adru 22.371667961776062\n",
-      "cetv 24.057965252509586\n",
-      "astc 1.4152123552123521\n",
-      "arry 5.1828996138996075\n",
-      "ewbc 26.819362960231615\n",
-      "atro 31.862567476834023\n",
-      "anik 20.774474920463295\n",
-      "agys 10.303613901544395\n",
-      "ffiv 86.294579173745\n",
-      "dxyn 6.331316601930499\n",
-      "cdor 2.605772193822393\n",
-      "avdl 10.103034740154433\n",
-      "arwr 4.130016216216213\n",
-      "fbss 15.228308892278005\n",
-      "clmt 23.327073368339757\n",
-      "afsi 26.699826589189257\n",
-      "cmtl 30.96300771621617\n",
-      "cmcsa 35.90450579227791\n",
-      "denn 5.761945945945953\n",
-      "ccmp 39.67996139150581\n",
-      "depo 8.988274123938211\n",
-      "drys 13.498539550193074\n",
-      "cur 1.907691699604743\n",
-      "cban 8.23264092277992\n",
-      "emitf 12.964027813127366\n",
-      "dwch 8.038034755212372\n",
-      "cytr 1.9986748837837829\n",
-      "cswc 77.75590740695002\n",
-      "ctrn 20.54685713976835\n",
-      "efsc 18.541934354826246\n",
-      "cinf 42.25041697451733\n",
-      "dmrc 22.26364027351096\n",
-      "cbfv 18.72878765019304\n",
-      "cyrn 2.7131410714285673\n",
-      "esca 8.18794594324325\n",
-      "ffkt 19.472922793050166\n",
-      "chfn 15.708602240154498\n",
-      "bldp 2.3273861003861005\n",
-      "cent 11.43199613474903\n",
-      "ceva 17.124220071042476\n",
-      "dakt 12.215868713513515\n",
-      "crnt 6.269598454826258\n",
-      "axdx 8.656428568339782\n",
-      "cntf 2.6595637119691156\n",
-      "exas 9.390011581081083\n",
-      "admp 1.7122164397683428\n",
-      "bwen 5.326498072200769\n",
-      "cytx 3.3293219922779875\n",
-      "cece 9.062675674517372\n",
-      "conn 21.148482605791525\n",
-      "arci 3.1327799227799207\n",
-      "bbgi 5.33829729729731\n",
-      "expo 46.09936678262553\n",
-      "cgnx 32.55762165714287\n",
-      "cwst 6.658471042471043\n",
-      "fccy 10.67951246486487\n",
-      "chci 1.4581224154440184\n",
-      "cplp 9.927482215019769\n",
-      "eeft 35.11525484749034\n",
-      "cpsi 44.44345173899618\n",
-      "aaww 44.331602290347405\n",
-      "adra 27.3514517397683\n",
-      "belfa 21.04013901081089\n",
-      "bdge 24.120351324324314\n",
-      "arii 31.491413133590704\n",
-      "aiq 10.171544398841688\n",
-      "esnd 40.79829342664082\n",
-      "acta 11.32055983706564\n",
-      "allt 9.18001930270271\n",
-      "crtn 2.1850579150579117\n",
-      "cmco 20.10901159999999\n",
-      "expe 53.78315830308872\n",
-      "asur 3.8731236637065614\n",
-      "adre 39.14505407104248\n",
-      "aaxn 11.863907341698843\n",
-      "dltr 57.418077247490366\n",
-      "dmlp 22.283281861003864\n",
-      "byfc 3.4977644787644735\n",
-      "eslt 58.57627412471036\n",
-      "dwsn 6.194910959459459\n",
-      "acet 12.655212363320476\n",
-      "dest 18.788065616216212\n",
-      "bgfv 13.15647104671812\n",
-      "cemi 2.4821776061776015\n",
-      "amsf 30.34488032162161\n",
-      "edap 3.235803088803086\n",
-      "bbby 50.18332436486479\n",
-      "cfnl 14.268891889189167\n",
-      "crvl 37.443864903474925\n",
-      "etfc 14.956660266795353\n",
-      "cort 3.299548262548255\n",
-      "airt 12.430108102316591\n",
-      "cfbk 2.1748416988416936\n",
-      "cvcy 9.671478766409258\n",
-      "camp 9.500046333590733\n",
-      "cohr 53.71215058262553\n",
-      "banr 26.60423480193051\n",
-      "bldr 6.945467184942081\n",
-      "dvcr 7.688459461389958\n",
-      "aapl 257.17654040231656\n",
-      "akrx 15.387104233590746\n",
-      "clct 14.436679601158307\n",
-      "ccoi 23.236517377606194\n",
-      "bdsi 4.820706564478758\n",
-      "discb 39.652757378378595\n",
-      "dcom 14.7273552096525\n",
-      "eei 12.263416957915059\n",
-      "avhi 22.406231679150594\n",
-      "ahgp 38.20530885868731\n",
-      "bcrx 6.095837838223926\n",
-      "banf 49.6434980416988\n",
-      "buse 10.92032454362932\n",
-      "cgnt 1.5946138996139008\n",
-      "atlo 22.101030884556003\n",
-      "alsk 5.995567569498072\n",
-      "blfs 0.8122763011583004\n",
-      "arql 3.874424710424698\n",
-      "anat 97.93825093397685\n",
-      "cray 15.423347486486477\n",
-      "capr 2.4732474629196006\n",
-      "cobz 10.071579151737454\n",
-      "atni 47.67885716216225\n",
-      "drrx 2.3527799227799227\n",
-      "cbsh 42.69090342934362\n",
-      "amtd 23.49051739768336\n",
-      "ande 41.829026980308846\n",
-      "bybk 6.642911204633245\n",
-      "ebix 32.53216976293444\n",
-      "fbms 14.22483009266407\n",
-      "cenx 18.395567551737482\n",
-      "amswa 8.076181467181465\n",
-      "cyccp 4.965254826254832\n",
-      "crds 1.8903166015444017\n",
-      "cash 32.26195366332041\n",
-      "algt 83.70168345444011\n",
-      "acxm 18.26306178378379\n",
-      "bwld 89.39383399150582\n",
-      "emcf 22.21991505482626\n",
-      "dysl 1.8631660231660265\n",
-      "axas 2.836629343629344\n",
-      "adbe 51.19943628416986\n",
-      "ffin 42.17889953474895\n",
-      "asfi 11.159220083783804\n",
-      "chke 19.15281466100384\n",
-      "biib 164.53822006139012\n",
-      "ainv 9.949749044015475\n",
-      "evbs 8.454656358687263\n",
-      "falc 3.609212355212365\n",
-      "call 10.101200768339778\n",
-      "caas 7.6334401583011715\n",
-      "educ 5.948108107721997\n",
-      "asna 17.81176063204633\n",
-      "eght 5.531308880694991\n",
-      "amnb 22.11375289034745\n",
-      "cffn 21.416077199999965\n",
-      "cgo 13.773633200000043\n",
-      "centa 10.959813017140611\n",
-      "banfp 26.415837825482622\n",
-      "dtrm 3.723555984555983\n",
-      "entg 9.497733591505801\n",
-      "bmra 0.901011583011584\n",
-      "cvly 15.41210743397683\n",
-      "bbh 113.28309655096503\n",
-      "fast 44.40756368957524\n",
-      "epay 20.796501912355225\n",
-      "acgl 63.325907376833804\n",
-      "aste 37.29283010849429\n",
-      "dxtr 2.749339768339764\n",
-      "ea 37.33655212046332\n",
-      "alny 39.17148648803088\n",
-      "endp 37.3472664173746\n",
-      "csq 10.269231660231666\n",
-      "exls 26.462393820849456\n",
-      "artx 2.0992316602316587\n",
-      "ebtc 17.20153669227798\n",
-      "bbsi 30.42552507722014\n",
-      "cme 230.29466011003882\n",
-      "alqa 1.4052982830115854\n",
-      "cytk 4.742564193822396\n",
-      "csco 23.628822402702724\n",
-      "fisv 67.52742853513507\n",
-      "eric 13.297131263706602\n",
-      "arlp 46.94279149343634\n",
-      "coke 80.56527417181458\n",
-      "cacb 7.0127567953667915\n",
-      "colb 23.599138993050214\n",
-      "clwt 2.61713222471043\n",
-      "cffi 32.43315446061778\n",
-      "dspg 9.215841698069477\n",
-      "farm 20.19316601351357\n",
-      "abio 2.2518008000000007\n",
-      "evlv 3.9725907335907293\n",
-      "cznc 18.655586874131235\n",
-      "amat 17.116648652509628\n",
-      "algn 36.75162934864863\n",
-      "cvti 10.437138995752889\n",
-      "acor 27.47286873938217\n",