Queer European MD passionate about IT
Przeglądaj źródła

2777 solutions rewrite

Christian Pascual 4 lat temu
rodzic
commit
370f2e7d3f
1 zmienionych plików z 88 dodań i 56 usunięć
  1. 88 56
      Mission277Solutions.Rmd

+ 88 - 56
Mission277Solutions.Rmd

@@ -7,10 +7,7 @@ output: html_document
 
 Load the packages we will need for the exercise: 
 ```{r}
-library(readr)
-library(dplyr)
-library(ggplot2)
-library(purrr)
+library(tidyverse)
 ```
 
 Import the data file. Save it as a data frame. 
@@ -26,79 +23,114 @@ fires_by_month <- forest_fires %>%
   group_by(month) %>%
   summarize(total_fires = n())
 
-ggplot(data = fires_by_month,
-  aes(x = month, y = total_fires)) +
-  geom_bar(stat = "identity")  +
-  theme(panel.background = element_rect(fill = "white"), 
-        axis.line = element_line(size = 0.25, 
-                                 colour = "black"))
+fires_by_month %>% 
+  ggplot(aes(x = month, y = total_fires)) +
+  geom_col()
 ```
 
 Create a bar chart showing the number of forest fires occurring on each day of the week
 
 ```{r}
-fires_by_DOW <- forest_fires %>%
+fires_by_dow <- forest_fires %>%
   group_by(day) %>%
   summarize(total_fires = n())
 
-ggplot(data = fires_by_DOW,
-  aes(x = day, y = total_fires)) +
-  geom_bar(stat = "identity") +
-  theme(panel.background = element_rect(fill = "white"), 
-        axis.line = element_line(size = 0.25, 
-                                 colour = "black")) 
+fires_by_dow %>% 
+  ggplot(aes(x = day, y = total_fires)) +
+  geom_col()
 ```
 
-Change the data type of month to factor and specify the order of months
+Adding another column to help us order the months
 
 ```{r}
-forest_fires <- forest_fires %>%
-  mutate(month = factor(month, levels = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")), 
-         day = factor(day, levels = c("sun", "mon", "tue", "wed", "thu", "fri", "sat")))
+fires_by_month %>% 
+  mutate(
+    month_num = case_when(
+      month == "jan" ~ 1,
+      month == "feb" ~ 2,
+      month == "mar" ~ 3,
+      month == "apr" ~ 4,
+      month == "may" ~ 5,
+      month == "jun" ~ 6,
+      month == "jul" ~ 7,
+      month == "aug" ~ 8,
+      month == "sep" ~ 9,
+      month == "oct" ~ 10,
+      month == "nov" ~ 11,
+      month == "dec" ~ 12,
+    )
+  ) %>% 
+  ggplot(aes(x = month_num, y = total_fires)) +
+  geom_col() 
+```
 
-## once you have reordered the months and days of the week, you can re-run the bar chart code above
-# to create new bar graphs
+```{r}
+fires_by_dow %>% 
+  mutate(
+    day_num = case_when(
+      day == "sun" ~ 1,
+      day == "mon" ~ 2,
+      day == "tue" ~ 3,
+      day == "wed" ~ 4,
+      day == "thu" ~ 5,
+      day == "fri" ~ 6,
+      day == "sat" ~ 7,
+    )
+  ) %>% 
+  ggplot(aes(x = day_num, y = total_fires)) +
+  geom_col() +
+  scale_x_discrete(
+    breaks = 
+  )
 ```
 
 Write a function to create a boxplot for visualizing variable distributions by month and day of the week
 
-```{r}
 
-## Write the function
-create_boxplots <- function(x, y) {
-  ggplot(data = forest_fires, 
-    aes_string(x = x, y = y)) +
-    geom_boxplot() +
-    theme(panel.background = element_rect(fill = "white"))
-}
-
-## Assign x and y variable names 
-x_var_month <- names(forest_fires)[3] ## month
-x_var_day <- names(forest_fires)[4] ## day
-y_var <- names(forest_fires)[5:12]
-
-## use the map() function to apply the function to the variables of interest
-month_box <- map2(x_var_month, y_var, create_boxplots) ## visualize variables by month
-day_box <- map2(x_var_day, y_var, create_boxplots) ## visualize variables by day
+```{r}
+forest_fires_long <- forest_fires %>% 
+  mutate(
+    month_num = case_when(
+      month == "jan" ~ 1,
+      month == "feb" ~ 2,
+      month == "mar" ~ 3,
+      month == "apr" ~ 4,
+      month == "may" ~ 5,
+      month == "jun" ~ 6,
+      month == "jul" ~ 7,
+      month == "aug" ~ 8,
+      month == "sep" ~ 9,
+      month == "oct" ~ 10,
+      month == "nov" ~ 11,
+      month == "dec" ~ 12,
+    )
+  ) %>% 
+  pivot_longer(
+    cols = c("FFMC", "DMC", "DC", 
+             "ISI", "temp", "RH", "wind", "rain"),
+    names_to = "data_col",
+    values_to = "value"
+  )
+
+forest_fires_long %>% 
+  ggplot(aes(x = month, y = value)) +
+  geom_boxplot() +
+  facet_grid(rows = vars(data_col), scales = "free_y")
 ```
 
-
 Create scatter plots to see which variables may affect forest fire size: 
 
 ```{r}
-
-## write the function 
-create_scatterplots = function(x, y) {
-  ggplot(data = forest_fires, 
-    aes_string(x = x, y = y)) +
-    geom_point() +
-    theme(panel.background = element_rect(fill = "white"))
-}
-
-## Assign x and y variable names 
-x_var_scatter <- names(forest_fires)[5:12]
-y_var_scatter <- names(forest_fires)[13]
-
-## use the map() function to apply the function to the variables of interest
-scatters <- map2(x_var_scatter, y_var_scatter, create_scatterplots)
+forest_fires_long %>% 
+  ggplot(aes(x = value, y = area)) +
+  geom_point() +
+  facet_wrap(vars(data_col), scales = "free_x")
 ```
+
+```{r}
+forest_fires_long %>% 
+  filter(area < 300) %>% 
+  ggplot(aes(x = value, y = area)) +
+  geom_point() +
+  facet_wrap(vars(data_col), scales = "free_x")
+```