Queer European MD passionate about IT
Эх сурвалжийг харах

Merge branch 'master' of github.com:dataquestio/solutions

Christian Pascual 5 жил өмнө
parent
commit
64a45b1d49

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 10 - 6
Mission146Solutions.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 115 - 413
Mission155Solutions.ipynb


+ 4 - 3
Mission210Solution.ipynb

@@ -250,6 +250,7 @@
     "def normalize_text(text):\n",
     "def normalize_text(text):\n",
     "    text = text.lower()\n",
     "    text = text.lower()\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
+    "    text = re.sub(\"\\s+\", \" \", text)\n",
     "    return text\n",
     "    return text\n",
     "\n",
     "\n",
     "def normalize_values(text):\n",
     "def normalize_values(text):\n",
@@ -570,8 +571,8 @@
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
     "def count_matches(row):\n",
     "def count_matches(row):\n",
-    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
-    "    split_question = row[\"clean_question\"].split(\" \")\n",
+    "    split_answer = row[\"clean_answer\"].split()\n",
+    "    split_question = row[\"clean_question\"].split()\n",
     "    if \"the\" in split_answer:\n",
     "    if \"the\" in split_answer:\n",
     "        split_answer.remove(\"the\")\n",
     "        split_answer.remove(\"the\")\n",
     "    if len(split_answer) == 0:\n",
     "    if len(split_answer) == 0:\n",
@@ -593,7 +594,7 @@
     {
     {
      "data": {
      "data": {
       "text/plain": [
       "text/plain": [
-       "0.060493257069335914"
+       "0.059001965249777744"
       ]
       ]
      },
      },
      "execution_count": 10,
      "execution_count": 10,

+ 7 - 2
Mission327Solutions.Rmd

@@ -100,8 +100,13 @@ map2(x_var, y_var, create_scatter)
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 
 
 ```{r}
 ```{r}
-combined_survey_gather <- combined_survey %>%                         
-  gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+# combined_survey_gather <- combined_survey %>%
+#   gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+
+combined_survey_gather <- combined_survey %>%
+  pivot_longer(cols = saf_p_11:aca_tot_11,
+               names_to = "survey_question",
+               values_to = "score")
 ```
 ```
 
 
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.

+ 6 - 45
Mission469Solutions.ipynb

@@ -835,46 +835,7 @@
    "source": [
    "source": [
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "\n",
     "\n",
-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 I\n",
-      "1 t\n",
-      "2 e\n",
-      "3 r\n",
-      "4 a\n",
-      "5 t\n",
-      "6 e\n",
-      "7  \n",
-      "8 t\n",
-      "9 h\n",
-      "10 i\n",
-      "11 s\n",
-      "12 !\n"
-     ]
-    }
-   ],
-   "source": [
-    "some_iterable = \"Iterate this!\"\n",
-    "\n",
-    "for i,c in enumerate(some_iterable):\n",
-    "    print(i,c)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
    ]
    ]
   },
   },
   {
   {
@@ -908,12 +869,12 @@
    "source": [
    "source": [
     "tag_view_count = dict()\n",
     "tag_view_count = dict()\n",
     "\n",
     "\n",
-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
-    "    for tag in tags:\n",
+    "for index, row in questions.iterrows():\n",
+    "    for tag in row['Tags']:\n",
     "        if tag in tag_view_count:\n",
     "        if tag in tag_view_count:\n",
-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] += row['ViewCount']\n",
     "        else:\n",
     "        else:\n",
-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] = row['ViewCount']\n",
     "            \n",
     "            \n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
@@ -2224,7 +2185,7 @@
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "\n",
     "\n",
-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "xlims = ax1.get_xlim()\n",
     "xlims = ax1.get_xlim()\n",
     "\n",
     "\n",

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно