5 years ago · 64a45b1d49
--- a/Mission146Solutions.ipynb
+++ b/Mission146Solutions.ipynb
--- a/Mission155Solutions.ipynb
+++ b/Mission155Solutions.ipynb
--- a/Mission210Solution.ipynb
+++ b/Mission210Solution.ipynb
@@ -250,6 +250,7 @@
 
															     "def normalize_text(text):\n",
														
 
															     "    text = text.lower()\n",
														
 
															     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
														
 
															+    "    text = re.sub(\"\\s+\", \" \", text)\n",
														
 
															     "    return text\n",
														
 
															     "\n",
														
 
															     "def normalize_values(text):\n",
														
@@ -570,8 +571,8 @@
 
															    "outputs": [],
														
 
															    "source": [
														
 
															     "def count_matches(row):\n",
														
 
															-    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
														
 
															-    "    split_question = row[\"clean_question\"].split(\" \")\n",
														
 
															+    "    split_answer = row[\"clean_answer\"].split()\n",
														
 
															+    "    split_question = row[\"clean_question\"].split()\n",
														
 
															     "    if \"the\" in split_answer:\n",
														
 
															     "        split_answer.remove(\"the\")\n",
														
 
															     "    if len(split_answer) == 0:\n",
														
@@ -593,7 +594,7 @@
 
															     {
														
 
															      "data": {
														
 
															       "text/plain": [
														
 
															-       "0.060493257069335914"
														
 
															+       "0.059001965249777744"
														
 
															       ]
														
 
															      },
														
 
															      "execution_count": 10,
														
--- a/Mission327Solutions.Rmd
+++ b/Mission327Solutions.Rmd
@@ -100,8 +100,13 @@ map2(x_var, y_var, create_scatter)
 
															 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
														
 
															 ```{r}
														
 
															-combined_survey_gather <- combined_survey %>%                         
														
 
															-  gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
														
 
															+# combined_survey_gather <- combined_survey %>%
														
 
															+#   gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
														
 
															+
														
 
															+combined_survey_gather <- combined_survey %>%
														
 
															+  pivot_longer(cols = saf_p_11:aca_tot_11,
														
 
															+               names_to = "survey_question",
														
 
															+               values_to = "score")
														
 
															 ```
														
 
															 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.
														
--- a/Mission469Solutions.ipynb
+++ b/Mission469Solutions.ipynb
@@ -835,46 +835,7 @@
 
															    "source": [
														
 
															     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
														
 
															     "\n",
														
 
															-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
														
 
															-   ]
														
 
															-  },
														
 
															-  {
														
 
															-   "cell_type": "code",
														
 
															-   "execution_count": 11,
														
 
															-   "metadata": {},
														
 
															-   "outputs": [
														
 
															-    {
														
 
															-     "name": "stdout",
														
 
															-     "output_type": "stream",
														
 
															-     "text": [
														
 
															-      "0 I\n",
														
 
															-      "1 t\n",
														
 
															-      "2 e\n",
														
 
															-      "3 r\n",
														
 
															-      "4 a\n",
														
 
															-      "5 t\n",
														
 
															-      "6 e\n",
														
 
															-      "7  \n",
														
 
															-      "8 t\n",
														
 
															-      "9 h\n",
														
 
															-      "10 i\n",
														
 
															-      "11 s\n",
														
 
															-      "12 !\n"
														
 
															-     ]
														
 
															-    }
														
 
															-   ],
														
 
															-   "source": [
														
 
															-    "some_iterable = \"Iterate this!\"\n",
														
 
															-    "\n",
														
 
															-    "for i,c in enumerate(some_iterable):\n",
														
 
															-    "    print(i,c)"
														
 
															-   ]
														
 
															-  },
														
 
															-  {
														
 
															-   "cell_type": "markdown",
														
 
															-   "metadata": {},
														
 
															-   "source": [
														
 
															-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
														
 
															+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
														
 
															    ]
														
 
															   },
														
 
															   {
														
@@ -908,12 +869,12 @@
 
															    "source": [
														
 
															     "tag_view_count = dict()\n",
														
 
															     "\n",
														
 
															-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
														
 
															-    "    for tag in tags:\n",
														
 
															+    "for index, row in questions.iterrows():\n",
														
 
															+    "    for tag in row['Tags']:\n",
														
 
															     "        if tag in tag_view_count:\n",
														
 
															-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
														
 
															+    "            tag_view_count[tag] += row['ViewCount']\n",
														
 
															     "        else:\n",
														
 
															-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
														
 
															+    "            tag_view_count[tag] = row['ViewCount']\n",
														
 
															     "            \n",
														
 
															     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
														
 
															     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
														
@@ -2224,7 +2185,7 @@
 
															     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
														
 
															     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
														
 
															     "\n",
														
 
															-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
														
 
															+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
														
 
															     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
														
 
															     "xlims = ax1.get_xlim()\n",
														
 
															     "\n",