Queer European MD passionate about IT
Jelajahi Sumber

Merge branch 'master' of github.com:dataquestio/solutions

Christian Pascual 4 tahun lalu
induk
melakukan
64a45b1d49

File diff ditekan karena terlalu besar
+ 10 - 6
Mission146Solutions.ipynb


File diff ditekan karena terlalu besar
+ 115 - 413
Mission155Solutions.ipynb


+ 4 - 3
Mission210Solution.ipynb

@@ -250,6 +250,7 @@
     "def normalize_text(text):\n",
     "def normalize_text(text):\n",
     "    text = text.lower()\n",
     "    text = text.lower()\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
+    "    text = re.sub(\"\\s+\", \" \", text)\n",
     "    return text\n",
     "    return text\n",
     "\n",
     "\n",
     "def normalize_values(text):\n",
     "def normalize_values(text):\n",
@@ -570,8 +571,8 @@
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
     "def count_matches(row):\n",
     "def count_matches(row):\n",
-    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
-    "    split_question = row[\"clean_question\"].split(\" \")\n",
+    "    split_answer = row[\"clean_answer\"].split()\n",
+    "    split_question = row[\"clean_question\"].split()\n",
     "    if \"the\" in split_answer:\n",
     "    if \"the\" in split_answer:\n",
     "        split_answer.remove(\"the\")\n",
     "        split_answer.remove(\"the\")\n",
     "    if len(split_answer) == 0:\n",
     "    if len(split_answer) == 0:\n",
@@ -593,7 +594,7 @@
     {
     {
      "data": {
      "data": {
       "text/plain": [
       "text/plain": [
-       "0.060493257069335914"
+       "0.059001965249777744"
       ]
       ]
      },
      },
      "execution_count": 10,
      "execution_count": 10,

+ 7 - 2
Mission327Solutions.Rmd

@@ -100,8 +100,13 @@ map2(x_var, y_var, create_scatter)
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 
 
 ```{r}
 ```{r}
-combined_survey_gather <- combined_survey %>%                         
-  gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+# combined_survey_gather <- combined_survey %>%
+#   gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+
+combined_survey_gather <- combined_survey %>%
+  pivot_longer(cols = saf_p_11:aca_tot_11,
+               names_to = "survey_question",
+               values_to = "score")
 ```
 ```
 
 
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.

+ 6 - 45
Mission469Solutions.ipynb

@@ -835,46 +835,7 @@
    "source": [
    "source": [
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "\n",
     "\n",
-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 I\n",
-      "1 t\n",
-      "2 e\n",
-      "3 r\n",
-      "4 a\n",
-      "5 t\n",
-      "6 e\n",
-      "7  \n",
-      "8 t\n",
-      "9 h\n",
-      "10 i\n",
-      "11 s\n",
-      "12 !\n"
-     ]
-    }
-   ],
-   "source": [
-    "some_iterable = \"Iterate this!\"\n",
-    "\n",
-    "for i,c in enumerate(some_iterable):\n",
-    "    print(i,c)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
    ]
    ]
   },
   },
   {
   {
@@ -908,12 +869,12 @@
    "source": [
    "source": [
     "tag_view_count = dict()\n",
     "tag_view_count = dict()\n",
     "\n",
     "\n",
-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
-    "    for tag in tags:\n",
+    "for index, row in questions.iterrows():\n",
+    "    for tag in row['Tags']:\n",
     "        if tag in tag_view_count:\n",
     "        if tag in tag_view_count:\n",
-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] += row['ViewCount']\n",
     "        else:\n",
     "        else:\n",
-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] = row['ViewCount']\n",
     "            \n",
     "            \n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
@@ -2224,7 +2185,7 @@
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "\n",
     "\n",
-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "xlims = ax1.get_xlim()\n",
     "xlims = ax1.get_xlim()\n",
     "\n",
     "\n",

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini