Queer European MD passionate about IT
Quellcode durchsuchen

Merge branch 'master' of github.com:dataquestio/solutions

Christian Pascual vor 4 Jahren
Ursprung
Commit
64a45b1d49
5 geänderte Dateien mit 142 neuen und 469 gelöschten Zeilen
  1. 10 6
      Mission146Solutions.ipynb
  2. 115 413
      Mission155Solutions.ipynb
  3. 4 3
      Mission210Solution.ipynb
  4. 7 2
      Mission327Solutions.Rmd
  5. 6 45
      Mission469Solutions.ipynb

Datei-Diff unterdrückt, da er zu groß ist
+ 10 - 6
Mission146Solutions.ipynb


Datei-Diff unterdrückt, da er zu groß ist
+ 115 - 413
Mission155Solutions.ipynb


+ 4 - 3
Mission210Solution.ipynb

@@ -250,6 +250,7 @@
     "def normalize_text(text):\n",
     "    text = text.lower()\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
+    "    text = re.sub(\"\\s+\", \" \", text)\n",
     "    return text\n",
     "\n",
     "def normalize_values(text):\n",
@@ -570,8 +571,8 @@
    "outputs": [],
    "source": [
     "def count_matches(row):\n",
-    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
-    "    split_question = row[\"clean_question\"].split(\" \")\n",
+    "    split_answer = row[\"clean_answer\"].split()\n",
+    "    split_question = row[\"clean_question\"].split()\n",
     "    if \"the\" in split_answer:\n",
     "        split_answer.remove(\"the\")\n",
     "    if len(split_answer) == 0:\n",
@@ -593,7 +594,7 @@
     {
      "data": {
       "text/plain": [
-       "0.060493257069335914"
+       "0.059001965249777744"
       ]
      },
      "execution_count": 10,

+ 7 - 2
Mission327Solutions.Rmd

@@ -100,8 +100,13 @@ map2(x_var, y_var, create_scatter)
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 
 ```{r}
-combined_survey_gather <- combined_survey %>%                         
-  gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+# combined_survey_gather <- combined_survey %>%
+#   gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+
+combined_survey_gather <- combined_survey %>%
+  pivot_longer(cols = saf_p_11:aca_tot_11,
+               names_to = "survey_question",
+               values_to = "score")
 ```
 
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.

+ 6 - 45
Mission469Solutions.ipynb

@@ -835,46 +835,7 @@
    "source": [
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "\n",
-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 I\n",
-      "1 t\n",
-      "2 e\n",
-      "3 r\n",
-      "4 a\n",
-      "5 t\n",
-      "6 e\n",
-      "7  \n",
-      "8 t\n",
-      "9 h\n",
-      "10 i\n",
-      "11 s\n",
-      "12 !\n"
-     ]
-    }
-   ],
-   "source": [
-    "some_iterable = \"Iterate this!\"\n",
-    "\n",
-    "for i,c in enumerate(some_iterable):\n",
-    "    print(i,c)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
    ]
   },
   {
@@ -908,12 +869,12 @@
    "source": [
     "tag_view_count = dict()\n",
     "\n",
-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
-    "    for tag in tags:\n",
+    "for index, row in questions.iterrows():\n",
+    "    for tag in row['Tags']:\n",
     "        if tag in tag_view_count:\n",
-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] += row['ViewCount']\n",
     "        else:\n",
-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] = row['ViewCount']\n",
     "            \n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
@@ -2224,7 +2185,7 @@
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "\n",
-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "xlims = ax1.get_xlim()\n",
     "\n",

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.