|
@@ -7,10 +7,7 @@ output: html_document
|
|
|
|
|
|
Load the packages we will need for the exercise:
|
|
|
```{r}
|
|
|
-library(readr)
|
|
|
-library(dplyr)
|
|
|
-library(ggplot2)
|
|
|
-library(purrr)
|
|
|
+library(tidyverse)
|
|
|
```
|
|
|
|
|
|
Import the data file. Save it as a data frame.
|
|
@@ -26,79 +23,114 @@ fires_by_month <- forest_fires %>%
|
|
|
group_by(month) %>%
|
|
|
summarize(total_fires = n())
|
|
|
|
|
|
-ggplot(data = fires_by_month,
|
|
|
- aes(x = month, y = total_fires)) +
|
|
|
- geom_bar(stat = "identity") +
|
|
|
- theme(panel.background = element_rect(fill = "white"),
|
|
|
- axis.line = element_line(size = 0.25,
|
|
|
- colour = "black"))
|
|
|
+fires_by_month %>%
|
|
|
+ ggplot(aes(x = month, y = total_fires)) +
|
|
|
+ geom_col()
|
|
|
```
|
|
|
|
|
|
Create a bar chart showing the number of forest fires occurring on each day of the week
|
|
|
|
|
|
```{r}
|
|
|
-fires_by_DOW <- forest_fires %>%
|
|
|
+fires_by_dow <- forest_fires %>%
|
|
|
group_by(day) %>%
|
|
|
summarize(total_fires = n())
|
|
|
|
|
|
-ggplot(data = fires_by_DOW,
|
|
|
- aes(x = day, y = total_fires)) +
|
|
|
- geom_bar(stat = "identity") +
|
|
|
- theme(panel.background = element_rect(fill = "white"),
|
|
|
- axis.line = element_line(size = 0.25,
|
|
|
- colour = "black"))
|
|
|
+fires_by_dow %>%
|
|
|
+ ggplot(aes(x = day, y = total_fires)) +
|
|
|
+ geom_col()
|
|
|
```
|
|
|
|
|
|
-Change the data type of month to factor and specify the order of months
|
|
|
+Adding another column to help us order the months
|
|
|
|
|
|
```{r}
|
|
|
-forest_fires <- forest_fires %>%
|
|
|
- mutate(month = factor(month, levels = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")),
|
|
|
- day = factor(day, levels = c("sun", "mon", "tue", "wed", "thu", "fri", "sat")))
|
|
|
+fires_by_month %>%
|
|
|
+ mutate(
|
|
|
+ month_num = case_when(
|
|
|
+ month == "jan" ~ 1,
|
|
|
+ month == "feb" ~ 2,
|
|
|
+ month == "mar" ~ 3,
|
|
|
+ month == "apr" ~ 4,
|
|
|
+ month == "may" ~ 5,
|
|
|
+ month == "jun" ~ 6,
|
|
|
+ month == "jul" ~ 7,
|
|
|
+ month == "aug" ~ 8,
|
|
|
+ month == "sep" ~ 9,
|
|
|
+ month == "oct" ~ 10,
|
|
|
+ month == "nov" ~ 11,
|
|
|
+ month == "dec" ~ 12,
|
|
|
+ )
|
|
|
+ ) %>%
|
|
|
+ ggplot(aes(x = month_num, y = total_fires)) +
|
|
|
+ geom_col()
|
|
|
+```
|
|
|
|
|
|
-## once you have reordered the months and days of the week, you can re-run the bar chart code above
|
|
|
-# to create new bar graphs
|
|
|
+```{r}
|
|
|
+fires_by_dow %>%
|
|
|
+ mutate(
|
|
|
+ day_num = case_when(
|
|
|
+ day == "sun" ~ 1,
|
|
|
+ day == "mon" ~ 2,
|
|
|
+ day == "tue" ~ 3,
|
|
|
+ day == "wed" ~ 4,
|
|
|
+ day == "thu" ~ 5,
|
|
|
+ day == "fri" ~ 6,
|
|
|
+ day == "sat" ~ 7,
|
|
|
+ )
|
|
|
+ ) %>%
|
|
|
+ ggplot(aes(x = day_num, y = total_fires)) +
|
|
|
+ geom_col() +
|
|
|
+ scale_x_discrete(
|
|
|
+ breaks =
|
|
|
+ )
|
|
|
```
|
|
|
|
|
|
Write a function to create a boxplot for visualizing variable distributions by month and day of the week
|
|
|
|
|
|
-```{r}
|
|
|
|
|
|
-## Write the function
|
|
|
-create_boxplots <- function(x, y) {
|
|
|
- ggplot(data = forest_fires,
|
|
|
- aes_string(x = x, y = y)) +
|
|
|
- geom_boxplot() +
|
|
|
- theme(panel.background = element_rect(fill = "white"))
|
|
|
-}
|
|
|
-
|
|
|
-## Assign x and y variable names
|
|
|
-x_var_month <- names(forest_fires)[3] ## month
|
|
|
-x_var_day <- names(forest_fires)[4] ## day
|
|
|
-y_var <- names(forest_fires)[5:12]
|
|
|
-
|
|
|
-## use the map() function to apply the function to the variables of interest
|
|
|
-month_box <- map2(x_var_month, y_var, create_boxplots) ## visualize variables by month
|
|
|
-day_box <- map2(x_var_day, y_var, create_boxplots) ## visualize variables by day
|
|
|
+```{r}
|
|
|
+forest_fires_long <- forest_fires %>%
|
|
|
+ mutate(
|
|
|
+ month_num = case_when(
|
|
|
+ month == "jan" ~ 1,
|
|
|
+ month == "feb" ~ 2,
|
|
|
+ month == "mar" ~ 3,
|
|
|
+ month == "apr" ~ 4,
|
|
|
+ month == "may" ~ 5,
|
|
|
+ month == "jun" ~ 6,
|
|
|
+ month == "jul" ~ 7,
|
|
|
+ month == "aug" ~ 8,
|
|
|
+ month == "sep" ~ 9,
|
|
|
+ month == "oct" ~ 10,
|
|
|
+ month == "nov" ~ 11,
|
|
|
+ month == "dec" ~ 12,
|
|
|
+ )
|
|
|
+ ) %>%
|
|
|
+ pivot_longer(
|
|
|
+ cols = c("FFMC", "DMC", "DC",
|
|
|
+ "ISI", "temp", "RH", "wind", "rain"),
|
|
|
+ names_to = "data_col",
|
|
|
+ values_to = "value"
|
|
|
+ )
|
|
|
+
|
|
|
+forest_fires_long %>%
|
|
|
+ ggplot(aes(x = month, y = value)) +
|
|
|
+ geom_boxplot() +
|
|
|
+ facet_grid(rows = vars(data_col), scales = "free_y")
|
|
|
```
|
|
|
|
|
|
-
|
|
|
Create scatter plots to see which variables may affect forest fire size:
|
|
|
|
|
|
```{r}
|
|
|
-
|
|
|
-## write the function
|
|
|
-create_scatterplots = function(x, y) {
|
|
|
- ggplot(data = forest_fires,
|
|
|
- aes_string(x = x, y = y)) +
|
|
|
- geom_point() +
|
|
|
- theme(panel.background = element_rect(fill = "white"))
|
|
|
-}
|
|
|
-
|
|
|
-## Assign x and y variable names
|
|
|
-x_var_scatter <- names(forest_fires)[5:12]
|
|
|
-y_var_scatter <- names(forest_fires)[13]
|
|
|
-
|
|
|
-## use the map() function to apply the function to the variables of interest
|
|
|
-scatters <- map2(x_var_scatter, y_var_scatter, create_scatterplots)
|
|
|
+forest_fires_long %>%
|
|
|
+ ggplot(aes(x = value, y = area)) +
|
|
|
+ geom_point() +
|
|
|
+ facet_wrap(vars(data_col), scales = "free_x")
|
|
|
```
|
|
|
+
|
|
|
+```{r}
|
|
|
+forest_fires_long %>%
|
|
|
+ filter(area < 300) %>%
|
|
|
+ ggplot(aes(x = value, y = area)) +
|
|
|
+ geom_point() +
|
|
|
+ facet_wrap(vars(data_col), scales = "free_x")
|
|
|
+```
|