--- title: 'Guided Project: New York Solar Resource Data' author: "Dataquest" date: "11/26/2020" output: html_document --- # Finding the Suitable Endpoint and Parameters to Query the API ```{r} # Storing my api key in a variable the_key = "" #TODO Store your API key here # Identifying the API URL url <- "https://developer.nrel.gov/api/solar/solar_resource/v1.json" # Specifying the necessary parameters to request the New York City solar data parameters_list <- list(api_key = the_key, lat = 41, lon = -75) ``` # Extracting the New York Solar Resource Data ```{r} # Loading the `httr` package library(httr) # Using the `GET()` function to request the data from the API with `url` and `parameters_list` response <- GET(url, query = parameters_list) # Tracking errors ## Displaying the status code with the `status_code()` function print(status_code(response)) ## Displaying the API response format print(http_type(response)) # Extracting the API response content as text json_text <- content(response, "text") # Displaying this content to check how it looks visually. print(json_text) ``` # Parsing the JSON into R Object ```{r} # Parsing the `json_text` to a R object using the `jsonlite::fromJSON()` function json_lists <- jsonlite::fromJSON(json_text) # Displaying the structure of the R object using the `str()` function str(json_lists) ``` # How to Create a Datarame from a Complex List # Building Datarame from a Complex List ```{r} # Extracting the outputs data outputs_list <- json_lists$outputs # Extracting the monthly vector (`monthly`) from the (`avg_dni`) list in the outputs data avg_dni <- outputs_list$avg_dni$monthly # Extracting the monthly vector (`monthly`) from the (`avg_ghi`) list in the outputs data avg_ghi <- outputs_list$avg_ghi$monthly # Extracting the monthly vector (`monthly`) from the (`avg_lat_tilt`) list in the outputs data avg_lat_tilt <- outputs_list$avg_lat_tilt$monthly # Combining the monthly vectors into a dataframe using the `tibble::tibble()` function ## Adding the `month` column containing month abbreviations: `Jan`, `Fev`,...,`Dec` dataframe <- tibble::tibble("avg_dni" = avg_dni, "avg_ghi" = avg_ghi, "avg_lat_tilt" = avg_lat_tilt, "month" = month.abb) # Displaying the dataframe dataframe ``` We can see that all the columns are still lists containing one item. For future use of this dataframe, it would probably be necessary to convert these columns to numeric. # Extracting Datarame from a Complex List: ```{r} # Extracting the outputs list outputs_list <- json_lists$outputs # Simplifying the outputs list simplified_outputs_list <- unlist(outputs_list) # Restructuring the simplified list into a matrix of 13 rows (the annual value and 12 months values) data_matrix <- matrix(data = simplified_outputs_list, nrow = 13) # Removing the annual values from the data matrix data_matrix <- data_matrix[-1, ] # Converting the matrix into a dataframe using the `as.data.frame()` function another_dataframe <- as.data.frame(data_matrix) # Displaying the dataframe another_dataframe ``` # Putting all together ```{r} library(httr) library(dplyr) the_key = "" #TODO Store your API key here # Creating the custom `nrel_api_json_get_df()` function inspiring from what we did in the previous missions ## The function has two parameters ### The `endpoint` parameter represents the endpoint we need ### The `queries` parameter represents the list of API request parameters. nrel_api_json_get_df <- function(endpoint, queries = list()) { ## Preparing the URL url <- modify_url("https://developer.nrel.gov", path = endpoint) ## Querying the API response <- GET(url, query = queries) ## Tracking errors if ( http_error(response) ){ print(status_code(response)) print(http_status(response)) stop("Something went wrong.", call. = FALSE) } if (http_type(response) != "application/json") { stop("API did not return json", call. = FALSE) } ## Extracting content json_text <- content(response, "text") ## Converting content into Dataframe table_lst <- jsonlite::fromJSON(json_text) dataframe <- tibble::tibble("avg_dni" = as.numeric(table_lst$outputs$avg_dni$monthly), "avg_ghi" = as.numeric(table_lst$outputs$avg_ghi$monthly), "avg_lat_tilt" = as.numeric(table_lst$outputs$avg_lat_tilt$monthly), "month" = month.abb) ## Returning the dataframe dataframe } # Using the custom `nrel_api_json_get_df()` function to extract the solar resource as a dataframe ## Providing the `"api/solar/solar_resource/v1.json"` as the `endpoint` parameter ## Providing the `parameters_list` variable as `queries` parameter solar_resource_df <- nrel_api_json_get_df("api/solar/solar_resource/v1.json", parameters_list) # Printing the output dataframe solar_resource_df ``` # Visualizing New York City Solar Resource Data ```{r} # Loading the `ggplot2` and `dplyr` packages library(ggplot2) library(dplyr) # Using the `ggplot()` function to plot the `avg_dni` value for each month ggplot(data = solar_resource_df, aes(x = month, y = avg_dni, group = 1)) + geom_line() + geom_point() + theme_bw() ggsave("plot_avg_dni_before_factor.svg") # Converting the `month` column into factor using the following command solar_resource_df <- solar_resource_df %>% mutate(month = factor(month, levels = month.abb)) # Replotting the `avg_dni` value for each month ggplot(data = solar_resource_df, aes(x = month, y = avg_dni, group = 1)) + geom_line() + geom_point() + theme_bw() ggsave("plot_avg_dni_after_factor.svg") ``` The first plot x-axis is ordered alphabetically, while the second is in the natural order of months, from January to December. This operation allows ordering the labels in the plot as we wish.