Queer European MD passionate about IT
Ver código fonte

Use pandas methods

bruno 4 anos atrás
pai
commit
caaf9b2521
1 arquivos alterados com 6 adições e 45 exclusões
  1. 6 45
      Mission469Solutions.ipynb

+ 6 - 45
Mission469Solutions.ipynb

@@ -835,46 +835,7 @@
    "source": [
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "\n",
-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 I\n",
-      "1 t\n",
-      "2 e\n",
-      "3 r\n",
-      "4 a\n",
-      "5 t\n",
-      "6 e\n",
-      "7  \n",
-      "8 t\n",
-      "9 h\n",
-      "10 i\n",
-      "11 s\n",
-      "12 !\n"
-     ]
-    }
-   ],
-   "source": [
-    "some_iterable = \"Iterate this!\"\n",
-    "\n",
-    "for i,c in enumerate(some_iterable):\n",
-    "    print(i,c)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
    ]
   },
   {
@@ -908,12 +869,12 @@
    "source": [
     "tag_view_count = dict()\n",
     "\n",
-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
-    "    for tag in tags:\n",
+    "for index, row in questions.iterrows():\n",
+    "    for tag in row['Tags']:\n",
     "        if tag in tag_view_count:\n",
-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] += row['ViewCount']\n",
     "        else:\n",
-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] = row['ViewCount']\n",
     "            \n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
@@ -2224,7 +2185,7 @@
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "\n",
-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "xlims = ax1.get_xlim()\n",
     "\n",