Queer European MD passionate about IT
Просмотр исходного кода

Merge branch 'master' of github.com:dataquestio/solutions

Christian Pascual 5 лет назад
Родитель
Сommit
64a45b1d49
5 измененных файлов с 142 добавлено и 469 удалено
  1. 10 6
      Mission146Solutions.ipynb
  2. 115 413
      Mission155Solutions.ipynb
  3. 4 3
      Mission210Solution.ipynb
  4. 7 2
      Mission327Solutions.Rmd
  5. 6 45
      Mission469Solutions.ipynb

Разница между файлами не показана из-за своего большого размера
+ 10 - 6
Mission146Solutions.ipynb


Разница между файлами не показана из-за своего большого размера
+ 115 - 413
Mission155Solutions.ipynb


+ 4 - 3
Mission210Solution.ipynb

@@ -250,6 +250,7 @@
     "def normalize_text(text):\n",
     "def normalize_text(text):\n",
     "    text = text.lower()\n",
     "    text = text.lower()\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
     "    text = re.sub(\"[^A-Za-z0-9\\s]\", \"\", text)\n",
+    "    text = re.sub(\"\\s+\", \" \", text)\n",
     "    return text\n",
     "    return text\n",
     "\n",
     "\n",
     "def normalize_values(text):\n",
     "def normalize_values(text):\n",
@@ -570,8 +571,8 @@
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
     "def count_matches(row):\n",
     "def count_matches(row):\n",
-    "    split_answer = row[\"clean_answer\"].split(\" \")\n",
-    "    split_question = row[\"clean_question\"].split(\" \")\n",
+    "    split_answer = row[\"clean_answer\"].split()\n",
+    "    split_question = row[\"clean_question\"].split()\n",
     "    if \"the\" in split_answer:\n",
     "    if \"the\" in split_answer:\n",
     "        split_answer.remove(\"the\")\n",
     "        split_answer.remove(\"the\")\n",
     "    if len(split_answer) == 0:\n",
     "    if len(split_answer) == 0:\n",
@@ -593,7 +594,7 @@
     {
     {
      "data": {
      "data": {
       "text/plain": [
       "text/plain": [
-       "0.060493257069335914"
+       "0.059001965249777744"
       ]
       ]
      },
      },
      "execution_count": 10,
      "execution_count": 10,

+ 7 - 2
Mission327Solutions.Rmd

@@ -100,8 +100,13 @@ map2(x_var, y_var, create_scatter)
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 Reshape the data so that you can investigate differences in student, parent, and teacher responses to survey questions.
 
 
 ```{r}
 ```{r}
-combined_survey_gather <- combined_survey %>%                         
-  gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+# combined_survey_gather <- combined_survey %>%
+#   gather(key = "survey_question", value = score, saf_p_11:aca_tot_11)
+
+combined_survey_gather <- combined_survey %>%
+  pivot_longer(cols = saf_p_11:aca_tot_11,
+               names_to = "survey_question",
+               values_to = "score")
 ```
 ```
 
 
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.
 Use `str_sub()` to create new variables, `response_type` and `question`, from the `survey_question` variable.

+ 6 - 45
Mission469Solutions.ipynb

@@ -835,46 +835,7 @@
    "source": [
    "source": [
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "Some tags are very, very broad and are unlikely to be useful; e.g.: `python`, `dataset`, `r`. Before we investigate the tags a little deeper, let's repeat the same process for views.\n",
     "\n",
     "\n",
-    "We'll use Python's builtin [`enumerate()`](https://docs.python.org/3/library/functions.html#enumerate) function. Its utility is well understood by seeing it action."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 I\n",
-      "1 t\n",
-      "2 e\n",
-      "3 r\n",
-      "4 a\n",
-      "5 t\n",
-      "6 e\n",
-      "7  \n",
-      "8 t\n",
-      "9 h\n",
-      "10 i\n",
-      "11 s\n",
-      "12 !\n"
-     ]
-    }
-   ],
-   "source": [
-    "some_iterable = \"Iterate this!\"\n",
-    "\n",
-    "for i,c in enumerate(some_iterable):\n",
-    "    print(i,c)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In addition to the elements of `some_iterable`, `enumerate` gives us the index of each of them."
+    "We'll use _pandas_'s [`pandas.DataFrame.iterrows()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows). "
    ]
    ]
   },
   },
   {
   {
@@ -908,12 +869,12 @@
    "source": [
    "source": [
     "tag_view_count = dict()\n",
     "tag_view_count = dict()\n",
     "\n",
     "\n",
-    "for idx, tags in enumerate(questions[\"Tags\"]):\n",
-    "    for tag in tags:\n",
+    "for index, row in questions.iterrows():\n",
+    "    for tag in row['Tags']:\n",
     "        if tag in tag_view_count:\n",
     "        if tag in tag_view_count:\n",
-    "            tag_view_count[tag] += questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] += row['ViewCount']\n",
     "        else:\n",
     "        else:\n",
-    "            tag_view_count[tag] = questions[\"ViewCount\"].iloc[idx]\n",
+    "            tag_view_count[tag] = row['ViewCount']\n",
     "            \n",
     "            \n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count = pd.DataFrame.from_dict(tag_view_count, orient=\"index\")\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
     "tag_view_count.rename(columns={0: \"ViewCount\"}, inplace=True)\n",
@@ -2224,7 +2185,7 @@
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "ax2 = quarterly.plot(x=\"Quarter\", y=\"TotalQuestions\",\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "                     kind=\"bar\", ax=ax1, secondary_y=True, alpha=0.7, rot=45)\n",
     "\n",
     "\n",
-    "for idx, t in enumerate(quarterly[\"TotalQuestions\"]):\n",
+    "for idx, t in quarterly[\"TotalQuestions\"].iteritems():\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "    ax2.text(idx, t, str(t), ha=\"center\", va=\"bottom\")\n",
     "xlims = ax1.get_xlim()\n",
     "xlims = ax1.get_xlim()\n",
     "\n",
     "\n",

Некоторые файлы не были показаны из-за большого количества измененных файлов