Queer European MD passionate about IT
Browse Source

Merge branch 'master' of github.com:dataquestio/solutions

Christian Pascual 4 years ago
parent
commit
64a45b1d49
5 changed files with 142 additions and 469 deletions
  1. 10 6
      Mission146Solutions.ipynb
  2. 115 413
      Mission155Solutions.ipynb
  3. 4 3
      Mission210Solution.ipynb
  4. 7 2
      Mission327Solutions.Rmd
  5. 6 45
      Mission469Solutions.ipynb

File diff suppressed because it is too large
+ 10 - 6
Mission146Solutions.ipynb


File diff suppressed because it is too large
+ 115 - 413
Mission155Solutions.ipynb


+ 4 - 3
Mission210Solution.ipynb

@@ -250,6 +250,7 @@
     "def normalize_text(text):\n",
     "def normalize_text(text):\n",
     "    text = text.lower()\n",
     "    text = text.lower()\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
+    "    text = re.sub(\"\\s+\", \" \", text)\n",
     "    return text\n",
     "    return text\n",
     "\n",
     "\n",
     "def normalize_values(text):\n",
     "def normalize_values(text):\n",
@@ -570,8 +571,8 @@
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
     "def count_matches(row):\n",
     "def count_matches(row):\n",
-    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
-    "    split_question = row[\"clean_question\"].split(\" \")\n",
+    "    split_answer = row[\"clean_answer\"].split()\n",
+    "    split_question = row[\"clean_question\"].split()\n",
     "    if \"the\" in split_answer:\n",
     "    if \"the\" in split_answer:\n",
     "        split_answer.remove(\"the\")\n",
     "        split_answer.remove(\"the\")\n",
     "    if len(split_answer) == 0:\n",
     "    if len(split_answer) == 0:\n",
@@ -593,7 +594,7 @@
     {
     {
      "data": {
      "data": {
       "text/plain": [
       "text/plain": [
-       "0.060493257069335914"
+       "0.059001965249777744"
       ]
       ]
      },
      },
      "execution_count": 10,
      "execution_count": 10,

+ 7 - 2
Mission327Solutions.Rmd

@@ -100,8 +100,13 @@ map2(x_var, y_var, create_scatter)
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 
 
 ```{r}
 ```{r}
-combined_survey_gather <- combined_survey %>%                         
-  gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+# combined_survey_gather <- combined_survey %>%
+#   gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+
+combined_survey_gather <- combined_survey %>%
+  pivot_longer(cols = saf_p_11:aca_tot_11,
+               names_to = "survey_question",
+               values_to = "score")
 ```
 ```
 
 
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.

+ 6 - 45
Mission469Solutions.ipynb

@@ -835,46 +835,7 @@
    "source": [
    "source": [
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "\n",
     "\n",
-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 I\n",
-      "1 t\n",
-      "2 e\n",
-      "3 r\n",
-      "4 a\n",
-      "5 t\n",
-      "6 e\n",
-      "7  \n",
-      "8 t\n",
-      "9 h\n",
-      "10 i\n",
-      "11 s\n",
-      "12 !\n"
-     ]
-    }
-   ],
-   "source": [
-    "some_iterable = \"Iterate this!\"\n",
-    "\n",
-    "for i,c in enumerate(some_iterable):\n",
-    "    print(i,c)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
    ]
    ]
   },
   },
   {
   {
@@ -908,12 +869,12 @@
    "source": [
    "source": [
     "tag_view_count = dict()\n",
     "tag_view_count = dict()\n",
     "\n",
     "\n",
-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
-    "    for tag in tags:\n",
+    "for index, row in questions.iterrows():\n",
+    "    for tag in row['Tags']:\n",
     "        if tag in tag_view_count:\n",
     "        if tag in tag_view_count:\n",
-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] += row['ViewCount']\n",
     "        else:\n",
     "        else:\n",
-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] = row['ViewCount']\n",
     "            \n",
     "            \n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
@@ -2224,7 +2185,7 @@
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "\n",
     "\n",
-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "xlims = ax1.get_xlim()\n",
     "xlims = ax1.get_xlim()\n",
     "\n",
     "\n",

Some files were not shown because too many files changed in this diff