123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- ---
- title: "Solutions for Guided Project: Exploratory Visualization of Forest Fire Data"
- author: "Rose Martin"
- dat:e "December 4, 2018"
- output: html_document
- ---
- Load the packages we will need for the exercise:
- ```{r}
- library(readr)
- library(dplyr)
- library(ggplot2)
- library(purrr)
- ```
- Import the data file. Save it as a data frame.
- ```{r}
- forest_fires <- read_csv("forestfires.csv")
- ```
- Create a bar chart showing the number of forest fires occuring during each month
- ```{r}
- fires_by_month <- forest_fires %>%
- group_by(month) %>%
- summarize(total_fires = n())
- ggplot(data = fires_by_month) +
- aes(x = month, y = total_fires) +
- geom_bar(stat = "identity") +
- theme(panel.background = element_rect(fill = "white"),
- axis.line = element_line(size = 0.25,
- colour = "black"))
- ```
- Create a bar chart showing the number of forest fires occurring on each day of the week
- ```{r}
- fires_by_DOW <- forest_fires %>%
- group_by(day) %>%
- summarize(total_fires = n())
- ggplot(data = fires_by_DOW) +
- aes(x = day, y = total_fires) +
- geom_bar(stat = "identity") +
- theme(panel.background = element_rect(fill = "white"),
- axis.line = element_line(size = 0.25,
- colour = "black"))
- ```
- Change the data type of month to factor and specify the order of months
- ```{r}
- forest_fires <- forest_fires %>%
- mutate(month = factor(month, levels = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")),
- day = factor(day, levels = c("sun", "mon", "tue", "wed", "thu", "fri", "sat")))
- ## once you have reordered the months and days of the week, you can re-run the bar chart code above
- # to create new bar graphs
- ```
- Write a function to create a boxplot for visualizing variable distributions by month and day of the week
- ```{r}
- ## Write the function
- create_boxplots <- function(x, y) {
- ggplot(data = forest_fires) +
- aes_string(x = x, y = y) +
- geom_boxplot() +
- theme(panel.background = element_rect(fill = "white"))
- }
- ## Assign x and y variable names
- x_var_month <- names(forest_fires)[3] ## month
- x_var_day <- names(forest_fires)[4] ## day
- y_var <- names(forest_fires)[5:12]
- ## use the map() function to apply the function to the variables of interest
- month_box <- map2(x_var_month, y_var, create_boxplots) ## visualize variables by month
- day_box <- map2(x_var_day, y_var, create_boxplots) ## visualize variables by day
- ```
- Create scatter plots to see which variables may affect forest fire size:
- ```{r}
- ## write the function
- create_scatterplots = function(x, y) {
- ggplot(data = forest_fires) +
- aes_string(x = x, y = y) +
- geom_point() +
- theme(panel.background = element_rect(fill = "white"))
- }
- ## Assign x and y variable names
- x_var_scatter <- names(forest_fires)[5:12]
- y_var_scatter <- names(forest_fires)[13]
- ## use the map() function to apply the function to the variables of interest
- scatters <- map2(x_var_scatter, y_var_scatter, create_scatterplots)
- ```
|