vor 5 Jahren · 64a45b1d49
--- a/Mission146Solutions.ipynb
+++ b/Mission146Solutions.ipynb
--- a/Mission155Solutions.ipynb
+++ b/Mission155Solutions.ipynb
--- a/Mission210Solution.ipynb
+++ b/Mission210Solution.ipynb
@@ -250,6 +250,7 @@
 
				     "def normalize_text(text):\n",
			
 
				     "    text = text.lower()\n",
			
 
				     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
			
 
				+    "    text = re.sub(\"\\s+\", \" \", text)\n",
			
 
				     "    return text\n",
			
 
				     "\n",
			
 
				     "def normalize_values(text):\n",
			
@@ -570,8 +571,8 @@
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "def count_matches(row):\n",
			
 
				-    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
			
 
				-    "    split_question = row[\"clean_question\"].split(\" \")\n",
			
 
				+    "    split_answer = row[\"clean_answer\"].split()\n",
			
 
				+    "    split_question = row[\"clean_question\"].split()\n",
			
 
				     "    if \"the\" in split_answer:\n",
			
 
				     "        split_answer.remove(\"the\")\n",
			
 
				     "    if len(split_answer) == 0:\n",
			
@@ -593,7 +594,7 @@
 
				     {
			
 
				      "data": {
			
 
				       "text/plain": [
			
 
				-       "0.060493257069335914"
			
 
				+       "0.059001965249777744"
			
 
				       ]
			
 
				      },
			
 
				      "execution_count": 10,
			
--- a/Mission327Solutions.Rmd
+++ b/Mission327Solutions.Rmd
@@ -100,8 +100,13 @@ map2(x_var, y_var, create_scatter)
 
				 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
			
 
				 
			
 
				 ```{r}
			
 
				-combined_survey_gather <- combined_survey %>%                         
			
 
				-  gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
			
 
				+# combined_survey_gather <- combined_survey %>%
			
 
				+#   gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
			
 
				+
			
 
				+combined_survey_gather <- combined_survey %>%
			
 
				+  pivot_longer(cols = saf_p_11:aca_tot_11,
			
 
				+               names_to = "survey_question",
			
 
				+               values_to = "score")
			
 
				 ```
			
 
				 
			
 
				 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.
			
--- a/Mission469Solutions.ipynb
+++ b/Mission469Solutions.ipynb
@@ -835,46 +835,7 @@
 
				    "source": [
			
 
				     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
			
 
				     "\n",
			
 
				-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stdout",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "0 I\n",
			
 
				-      "1 t\n",
			
 
				-      "2 e\n",
			
 
				-      "3 r\n",
			
 
				-      "4 a\n",
			
 
				-      "5 t\n",
			
 
				-      "6 e\n",
			
 
				-      "7  \n",
			
 
				-      "8 t\n",
			
 
				-      "9 h\n",
			
 
				-      "10 i\n",
			
 
				-      "11 s\n",
			
 
				-      "12 !\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				-   "source": [
			
 
				-    "some_iterable = \"Iterate this!\"\n",
			
 
				-    "\n",
			
 
				-    "for i,c in enumerate(some_iterable):\n",
			
 
				-    "    print(i,c)"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
			
 
				+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -908,12 +869,12 @@
 
				    "source": [
			
 
				     "tag_view_count = dict()\n",
			
 
				     "\n",
			
 
				-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
			
 
				-    "    for tag in tags:\n",
			
 
				+    "for index, row in questions.iterrows():\n",
			
 
				+    "    for tag in row['Tags']:\n",
			
 
				     "        if tag in tag_view_count:\n",
			
 
				-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
			
 
				+    "            tag_view_count[tag] += row['ViewCount']\n",
			
 
				     "        else:\n",
			
 
				-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
			
 
				+    "            tag_view_count[tag] = row['ViewCount']\n",
			
 
				     "            \n",
			
 
				     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
			
 
				     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
			
@@ -2224,7 +2185,7 @@
 
				     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
			
 
				     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
			
 
				     "\n",
			
 
				-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
			
 
				+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
			
 
				     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
			
 
				     "xlims = ax1.get_xlim()\n",
			
 
				     "\n",