Queer European MD passionate about IT

Mission277Solutions.Rmd 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. ---
  2. title: "Solutions for Guided Project: Exploratory Visualization of Forest Fire Data"
  3. author: "Rose Martin"
  4. dat:e "December 4, 2018"
  5. output: html_document
  6. ---
  7. Load the packages we will need for the exercise:
  8. ```{r}
  9. library(tidyverse)
  10. ```
  11. Import the data file. Save it as a data frame.
  12. ```{r}
  13. forest_fires <- read_csv("forestfires.csv")
  14. ```
  15. Create a bar chart showing the number of forest fires occuring during each month
  16. ```{r}
  17. fires_by_month <- forest_fires %>%
  18. group_by(month) %>%
  19. summarize(total_fires = n())
  20. fires_by_month %>%
  21. ggplot(aes(x = month, y = total_fires)) +
  22. geom_col()
  23. ```
  24. Create a bar chart showing the number of forest fires occurring on each day of the week
  25. ```{r}
  26. fires_by_dow <- forest_fires %>%
  27. group_by(day) %>%
  28. summarize(total_fires = n())
  29. fires_by_dow %>%
  30. ggplot(aes(x = day, y = total_fires)) +
  31. geom_col()
  32. ```
  33. Adding another column to help us order the months
  34. ```{r}
  35. fires_by_month %>%
  36. mutate(
  37. month_num = case_when(
  38. month == "jan" ~ 1,
  39. month == "feb" ~ 2,
  40. month == "mar" ~ 3,
  41. month == "apr" ~ 4,
  42. month == "may" ~ 5,
  43. month == "jun" ~ 6,
  44. month == "jul" ~ 7,
  45. month == "aug" ~ 8,
  46. month == "sep" ~ 9,
  47. month == "oct" ~ 10,
  48. month == "nov" ~ 11,
  49. month == "dec" ~ 12,
  50. )
  51. ) %>%
  52. ggplot(aes(x = month_num, y = total_fires)) +
  53. geom_col()
  54. ```
  55. ```{r}
  56. fires_by_dow %>%
  57. mutate(
  58. day_num = case_when(
  59. day == "sun" ~ 1,
  60. day == "mon" ~ 2,
  61. day == "tue" ~ 3,
  62. day == "wed" ~ 4,
  63. day == "thu" ~ 5,
  64. day == "fri" ~ 6,
  65. day == "sat" ~ 7,
  66. )
  67. ) %>%
  68. ggplot(aes(x = day_num, y = total_fires)) +
  69. geom_col() +
  70. scale_x_discrete(
  71. breaks =
  72. )
  73. ```
  74. Write a function to create a boxplot for visualizing variable distributions by month and day of the week
  75. ```{r}
  76. forest_fires_long <- forest_fires %>%
  77. mutate(
  78. month_num = case_when(
  79. month == "jan" ~ 1,
  80. month == "feb" ~ 2,
  81. month == "mar" ~ 3,
  82. month == "apr" ~ 4,
  83. month == "may" ~ 5,
  84. month == "jun" ~ 6,
  85. month == "jul" ~ 7,
  86. month == "aug" ~ 8,
  87. month == "sep" ~ 9,
  88. month == "oct" ~ 10,
  89. month == "nov" ~ 11,
  90. month == "dec" ~ 12,
  91. )
  92. ) %>%
  93. pivot_longer(
  94. cols = c("FFMC", "DMC", "DC",
  95. "ISI", "temp", "RH", "wind", "rain"),
  96. names_to = "data_col",
  97. values_to = "value"
  98. )
  99. forest_fires_long %>%
  100. ggplot(aes(x = month, y = value)) +
  101. geom_boxplot() +
  102. facet_grid(rows = vars(data_col), scales = "free_y")
  103. ```
  104. Create scatter plots to see which variables may affect forest fire size:
  105. ```{r}
  106. forest_fires_long %>%
  107. ggplot(aes(x = value, y = area)) +
  108. geom_point() +
  109. facet_wrap(vars(data_col), scales = "free_x")
  110. ```
  111. ```{r}
  112. forest_fires_long %>%
  113. filter(area < 300) %>%
  114. ggplot(aes(x = value, y = area)) +
  115. geom_point() +
  116. facet_wrap(vars(data_col), scales = "free_x")
  117. ```