Queer European MD passionate about IT

Mission571Solutions.Rmd 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. ---
  2. title: 'Guided Project: New York Solar Resource Data'
  3. author: "Dataquest"
  4. date: "11/26/2020"
  5. output: html_document
  6. ---
  7. # Introduction
  8. - Title: Analyzing New York solar data.
  9. - Using APIs gives us access to an incredible amount of data only available online. In this exercise, we want to extract New York City solar data. Such data can, for example, allow us to determine on average the most productive periods of the year for solar panel deployment.
  10. # Finding the Suitable Endpoint and Parameters to Query the API
  11. ```{r}
  12. # Storing my api key in a variable
  13. the_key = "" #TODO Store your API key here
  14. # Identifying the API URL
  15. url <- "https://developer.nrel.gov/api/solar/solar_resource/v1.json"
  16. # Specifying the necessary parameters to request the New York City solar data
  17. parameters_list <- list(api_key = the_key, lat = 41, lon = -75)
  18. ```
  19. # Extracting the New York Solar Resource Data
  20. ```{r}
  21. # Loading the `httr` package
  22. library(httr)
  23. # Using the `GET()` function to request the data from the API with `url` and `parameters_list`
  24. response <- GET(url, query = parameters_list)
  25. # Tracking errors
  26. ## Displaying the status code with the `status_code()` function
  27. status <- status_code(response)
  28. status
  29. ## Displaying the API response format
  30. response_type <- http_type(response)
  31. response_type
  32. # Extracting the API response content as text
  33. content <- content(response, "text")
  34. # Displaying this content to check how it looks visually.
  35. print(content)
  36. ```
  37. # Parsing the JSON into R Object
  38. ```{r}
  39. # Parsing the `json_text` to a R object using the `jsonlite::fromJSON()` function
  40. json_lists <- jsonlite::fromJSON(content)
  41. # Displaying the structure of the R object using the `str()` function
  42. str(json_lists)
  43. ```
  44. # How to Create a Datarame from a Complex List
  45. # Building Datarame from a Complex List
  46. ```{r}
  47. # Extracting the outputs data
  48. outputs_list <- json_lists$outputs
  49. # Extracting the monthly vector (`monthly`) from the (`avg_dni`) list in the outputs data
  50. avg_dni <- outputs_list$avg_dni$monthly
  51. # Extracting the monthly vector (`monthly`) from the (`avg_ghi`) list in the outputs data
  52. avg_ghi <- outputs_list$avg_ghi$monthly
  53. # Extracting the monthly vector (`monthly`) from the (`avg_lat_tilt`) list in the outputs data
  54. avg_lat_tilt <- outputs_list$avg_lat_tilt$monthly
  55. # Combining the monthly vectors into a dataframe using the `tibble::tibble()` function
  56. ## Adding the `month` column containing month abbreviations: `Jan`, `Fev`,...,`Dec`
  57. dataframe <- tibble::tibble("month" = month.abb,
  58. "avg_dni" = avg_dni,
  59. "avg_ghi" = avg_ghi,
  60. "avg_lat_tilt" = avg_lat_tilt)
  61. # Displaying the dataframe
  62. dataframe
  63. ```
  64. - (Instruction 4's answer)
  65. We can see that all the columns are still lists containing one item. For future use of this dataframe, it would likely be necessary to convert these columns to numeric data type.
  66. # Extracting Datarame from a Complex List:
  67. ```{r}
  68. # Extracting the outputs list
  69. outputs_list <- json_lists$outputs
  70. # Simplifying the outputs list
  71. simplified_outputs_list <- unlist(outputs_list)
  72. # Restructuring the simplified list into a matrix of 13 rows (the annual value and 12 months values)
  73. data_matrix <- matrix(data = simplified_outputs_list, nrow = 13)
  74. # Removing the annual values from the data matrix
  75. data_matrix <- data_matrix[-1, ]
  76. # Converting the matrix into a dataframe using the `as.data.frame()` function
  77. another_dataframe <- as.data.frame(data_matrix)
  78. # Displaying the dataframe
  79. another_dataframe
  80. ```
  81. - (Instruction 6's answer)
  82. We can see that all the columns are numeric. However, we haven't appended the `month` column yet.
  83. # Putting all together
  84. ```{r}
  85. library(httr)
  86. library(dplyr)
  87. the_key = "" #TODO Store your API key here
  88. # Creating the custom `nrel_api_json_get_df()` function inspiring from what we did in the previous missions
  89. ## The function has two parameters
  90. ### The `endpoint` parameter represents the endpoint we need
  91. ### The `queries` parameter represents the list of API request parameters.
  92. nrel_api_json_get_df <- function(endpoint, queries = list()) {
  93. ## Preparing the URL
  94. url <- modify_url("https://developer.nrel.gov", path = endpoint)
  95. ## Querying the API
  96. response <- GET(url, query = queries)
  97. ## Tracking errors
  98. if ( http_error(response) ){
  99. print(status_code(response))
  100. print(http_status(response))
  101. stop("Something went wrong.", call. = FALSE)
  102. }
  103. if (http_type(response) != "application/json") {
  104. stop("API did not return json", call. = FALSE)
  105. }
  106. ## Extracting content
  107. json_text <- content(response, "text")
  108. ## Converting content into Dataframe
  109. table_lst <- jsonlite::fromJSON(json_text)
  110. dataframe <- tibble::tibble("month" = month.abb,
  111. "avg_dni" = as.numeric(table_lst$outputs$avg_dni$monthly),
  112. "avg_ghi" = as.numeric(table_lst$outputs$avg_ghi$monthly),
  113. "avg_lat_tilt" = as.numeric(table_lst$outputs$avg_lat_tilt$monthly))
  114. ## Returning the dataframe
  115. dataframe
  116. }
  117. # Using the custom `nrel_api_json_get_df()` function to extract the solar resource as a dataframe
  118. ## Providing the `"api/solar/solar_resource/v1.json"` as the `endpoint` parameter
  119. ## Providing the `parameters_list` variable as `queries` parameter
  120. solar_resource_df <- nrel_api_json_get_df("api/solar/solar_resource/v1.json", parameters_list)
  121. # Printing the output dataframe
  122. solar_resource_df
  123. ```
  124. # Visualizing New York City Solar Resource Data
  125. ```{r}
  126. # Loading the `ggplot2` and `dplyr` packages
  127. library(ggplot2)
  128. library(dplyr)
  129. # Using the `ggplot()` function to plot the `avg_dni` value for each month
  130. ggplot(data = solar_resource_df,
  131. aes(x = month, y = avg_dni, group = 1)) +
  132. geom_line() +
  133. geom_point() +
  134. theme_bw()
  135. # Converting the `month` column into factor using the following command
  136. solar_resource_df <- solar_resource_df %>%
  137. mutate(month = factor(month, levels = month.abb))
  138. # Replotting the `avg_dni` value for each month
  139. ggplot(data = solar_resource_df,
  140. aes(x = month, y = avg_dni, group = 1)) +
  141. geom_line() +
  142. geom_point() +
  143. theme_bw()
  144. ```
  145. - (Instruction 5's answer)
  146. The first plot x-axis is ordered alphabetically, while the second is ordered chronologically from January to December.
  147. This operation allows ordering the labels in the plot as we wish.