4 سال پیش · a45875b02b
--- a/Mission571Solutions.Rmd
+++ b/Mission571Solutions.Rmd
@@ -0,0 +1,187 @@
 
				+---
			
 
				+title: 'Guided Project: New York Solar Resource Data'
			
 
				+author: "Dataquest"
			
 
				+date: "11/26/2020"
			
 
				+output: html_document
			
 
				+---
			
 
				+
			
 
				+# Finding the Suitable Endpoint and Parameters to Query the API
			
 
				+```{r}
			
 
				+# Storing my api key in a variable
			
 
				+the_key = "" #TODO Store your API key here
			
 
				+
			
 
				+# Identifying the API URL
			
 
				+url <- "https://developer.nrel.gov/api/solar/solar_resource/v1.json"
			
 
				+
			
 
				+# Specifying the necessary parameters to request the New York City solar data
			
 
				+parameters_list <- list(api_key = the_key, lat = 41, lon = -75)
			
 
				+```
			
 
				+
			
 
				+# Extracting the New York Solar Resource Data 
			
 
				+```{r}
			
 
				+# Loading the `httr` package
			
 
				+library(httr)
			
 
				+
			
 
				+# Using the `GET()` function to request the data from the API with `url` and `parameters_list`
			
 
				+response <- GET(url, query = parameters_list)
			
 
				+
			
 
				+# Tracking errors 
			
 
				+## Displaying the status code with the `status_code()` function
			
 
				+print(status_code(response))
			
 
				+
			
 
				+## Displaying the API response format
			
 
				+print(http_type(response))
			
 
				+
			
 
				+# Extracting the API response content as text
			
 
				+json_text <- content(response, "text")
			
 
				+
			
 
				+# Displaying this content to check how it looks visually.
			
 
				+print(json_text)
			
 
				+```
			
 
				+
			
 
				+# Parsing the JSON into R Object
			
 
				+```{r}
			
 
				+# Parsing the `json_text` to a R object using the `jsonlite::fromJSON()` function
			
 
				+json_lists <- jsonlite::fromJSON(json_text)
			
 
				+
			
 
				+# Displaying the structure of the R object using the `str()` function
			
 
				+str(json_lists)
			
 
				+```
			
 
				+
			
 
				+# How to Create a Datarame from a Complex List
			
 
				+
			
 
				+# Building Datarame from a Complex List
			
 
				+```{r}
			
 
				+# Extracting the outputs data
			
 
				+outputs_list <- json_lists$outputs
			
 
				+
			
 
				+# Extracting the monthly vector (`monthly`) from the (`avg_dni`) list in the outputs data
			
 
				+avg_dni <- outputs_list$avg_dni$monthly
			
 
				+
			
 
				+# Extracting the monthly vector (`monthly`) from the (`avg_ghi`) list in the outputs data
			
 
				+avg_ghi <- outputs_list$avg_ghi$monthly
			
 
				+
			
 
				+# Extracting the monthly vector (`monthly`) from the (`avg_lat_tilt`) list in the outputs data
			
 
				+avg_lat_tilt <- outputs_list$avg_lat_tilt$monthly
			
 
				+
			
 
				+# Combining the monthly vectors into a dataframe using the `tibble::tibble()` function
			
 
				+## Adding the `month` column containing month abbreviations: `Jan`, `Fev`,...,`Dec`
			
 
				+dataframe <- tibble::tibble("avg_dni" = avg_dni, 
			
 
				+                            "avg_ghi" = avg_ghi, 
			
 
				+                            "avg_lat_tilt" = avg_lat_tilt, 
			
 
				+                            "month" = month.abb)
			
 
				+
			
 
				+# Displaying the dataframe
			
 
				+dataframe
			
 
				+```
			
 
				+
			
 
				+We can see that all the columns are still lists containing one item. For future use of this dataframe, it would probably be necessary to convert these columns to numeric.
			
 
				+
			
 
				+# Extracting Datarame from a Complex List: 
			
 
				+```{r}
			
 
				+# Extracting the outputs list
			
 
				+outputs_list <- json_lists$outputs
			
 
				+
			
 
				+# Simplifying the outputs list
			
 
				+simplified_outputs_list <- unlist(outputs_list)
			
 
				+
			
 
				+# Restructuring the simplified list into a matrix of 13 rows (the annual value and 12 months values)
			
 
				+data_matrix <- matrix(data = simplified_outputs_list, nrow = 13)
			
 
				+
			
 
				+# Removing the annual values from the data matrix
			
 
				+data_matrix <- data_matrix[-1, ]
			
 
				+
			
 
				+# Converting the matrix into a dataframe using the `as.data.frame()` function
			
 
				+another_dataframe <- as.data.frame(data_matrix)
			
 
				+
			
 
				+# Displaying the dataframe
			
 
				+another_dataframe
			
 
				+```
			
 
				+
			
 
				+# Putting all together
			
 
				+```{r}
			
 
				+library(httr)
			
 
				+library(dplyr)
			
 
				+the_key = "" #TODO Store your API key here 
			
 
				+
			
 
				+# Creating the custom `nrel_api_json_get_df()` function inspiring from what we did in the previous missions
			
 
				+## The function has two parameters
			
 
				+### The `endpoint` parameter represents the endpoint we need
			
 
				+### The `queries` parameter represents the list of API request parameters.
			
 
				+nrel_api_json_get_df <- function(endpoint, queries = list()) {
			
 
				+
			
 
				+  ## Preparing the URL 
			
 
				+  url <- modify_url("https://developer.nrel.gov", path = endpoint)
			
 
				+  
			
 
				+  ## Querying the API
			
 
				+  response <- GET(url, query = queries)
			
 
				+
			
 
				+  ## Tracking errors
			
 
				+  if ( http_error(response) ){
			
 
				+    print(status_code(response))
			
 
				+    print(http_status(response))
			
 
				+    stop("Something went wrong.", call. = FALSE)
			
 
				+  }
			
 
				+    
			
 
				+  if (http_type(response) != "application/json") {
			
 
				+    stop("API did not return json", call. = FALSE)
			
 
				+  }
			
 
				+  
			
 
				+  ## Extracting content
			
 
				+  json_text <- content(response, "text")
			
 
				+    
			
 
				+  ## Converting content into Dataframe
			
 
				+  table_lst <- jsonlite::fromJSON(json_text)
			
 
				+
			
 
				+  dataframe <- tibble::tibble("avg_dni" = as.numeric(table_lst$outputs$avg_dni$monthly), 
			
 
				+                            "avg_ghi" = as.numeric(table_lst$outputs$avg_ghi$monthly), 
			
 
				+                            "avg_lat_tilt" = as.numeric(table_lst$outputs$avg_lat_tilt$monthly), 
			
 
				+                            "month" = month.abb)
			
 
				+    
			
 
				+  ## Returning the dataframe  
			
 
				+  dataframe
			
 
				+    
			
 
				+}
			
 
				+
			
 
				+# Using the custom `nrel_api_json_get_df()` function to extract the solar resource as a dataframe
			
 
				+## Providing the `"api/solar/solar_resource/v1.json"` as the `endpoint` parameter
			
 
				+## Providing the `parameters_list` variable as `queries` parameter
			
 
				+solar_resource_df <- nrel_api_json_get_df("api/solar/solar_resource/v1.json", parameters_list)
			
 
				+
			
 
				+# Printing the output dataframe
			
 
				+solar_resource_df
			
 
				+```
			
 
				+
			
 
				+# Visualizing New York City Solar Resource Data
			
 
				+```{r}
			
 
				+# Loading the `ggplot2` and `dplyr` packages
			
 
				+library(ggplot2)
			
 
				+library(dplyr)
			
 
				+
			
 
				+# Using the `ggplot()` function to plot the `avg_dni` value for each month 
			
 
				+ggplot(data = solar_resource_df,
			
 
				+       aes(x = month, y = avg_dni, group = 1)) +
			
 
				+  geom_line() +
			
 
				+  geom_point() +
			
 
				+  theme_bw()
			
 
				+
			
 
				+ggsave("plot_avg_dni_before_factor.svg")
			
 
				+
			
 
				+# Converting the `month` column into factor using the following command  
			
 
				+solar_resource_df <- solar_resource_df %>% 
			
 
				+  mutate(month = factor(month, levels = month.abb))
			
 
				+
			
 
				+# Replotting the `avg_dni` value for each month 
			
 
				+ggplot(data = solar_resource_df,
			
 
				+       aes(x = month, y = avg_dni, group = 1)) +
			
 
				+  geom_line() +
			
 
				+  geom_point() +
			
 
				+  theme_bw()
			
 
				+
			
 
				+ggsave("plot_avg_dni_after_factor.svg")
			
 
				+```
			
 
				+
			
 
				+The first plot x-axis is ordered alphabetically, while the second is in the natural order of months, from January to December. 
			
 
				+
			
 
				+This operation allows ordering the labels in the plot as we wish.
			
 
				+
			
--- a/Mission572Solutions.Rmd
+++ b/Mission572Solutions.Rmd
@@ -0,0 +1,181 @@
 
				+---
			
 
				+title: 'Guided Project: Analyzing Movie Ratings'
			
 
				+author: "Dataquest"
			
 
				+date: "11/26/2020"
			
 
				+output: html_document
			
 
				+---
			
 
				+
			
 
				+# Loading the Web Page
			
 
				+```{r}
			
 
				+# Loading the `rvest`, `dplyr`, and `ggplot2` packages
			
 
				+library(rvest)
			
 
				+library(dplyr)
			
 
				+library(ggplot2)
			
 
				+
			
 
				+# Specifying the URL where we will extract video data
			
 
				+url <- "http://dataquestio.github.io/web-scraping-pages/Feature%20Film,%20Released%20between%202020-03-01%20and%202020-07-31%20(Sorted%20by%20Popularity%20Ascending)%20-%20IMDb.html"
			
 
				+
			
 
				+# Loading the web page content using the `read_html()` function
			
 
				+wp_content <- read_html(url)
			
 
				+```
			
 
				+
			
 
				+# String Manipulation Reminder
			
 
				+```{r}
			
 
				+# Converting "10.50" into numeric
			
 
				+as.numeric("10.50")
			
 
				+
			
 
				+# Converting the vector `c("14.59", "3.14", "55")` into numeric
			
 
				+as.numeric(c("14.59", "3.14", "55"))
			
 
				+
			
 
				+# Parsing the vector `c("14 min", "17,35", "(2012)", "1,2,3,4")` into numeric
			
 
				+readr::parse_number(c("14 min", "17,35", "(2012)", "1,2,3,4"))
			
 
				+
			
 
				+# Removing whitespaces at the begining and end of `" Space before and after should disappear     "`
			
 
				+stringr::str_trim(" Space before and after should disappear     ")
			
 
				+```
			
 
				+
			
 
				+# Extracting Elements from the Header
			
 
				+```{r}
			
 
				+
			
 
				+# Extracting the movie's titles
			
 
				+## Finding the title CSS selector
			
 
				+title_selector <- ".lister-item-header a"
			
 
				+
			
 
				+## Identifying the number of elements this selector will select from Selector Gadget 
			
 
				+n_title <- 30
			
 
				+
			
 
				+## Extracting the movie titles combining the `html_nodes()` and `html_text()` function
			
 
				+titles <- wp_content %>% 
			
 
				+  html_nodes(title_selector) %>% 
			
 
				+  html_text()
			
 
				+
			
 
				+## Printing titles vector
			
 
				+titles
			
 
				+
			
 
				+# Extracting the movie's years
			
 
				+## Using a process similar to the one we used to extract the titles
			
 
				+year_selector <- ".lister-item-year"
			
 
				+n_year <- 30
			
 
				+years <- wp_content %>% 
			
 
				+  html_nodes(year_selector) %>% 
			
 
				+  html_text()
			
 
				+
			
 
				+## Converting the years from character to numeric data type
			
 
				+years <- readr::parse_number(years)
			
 
				+
			
 
				+## Printing years vector
			
 
				+years
			
 
				+```
			
 
				+
			
 
				+# Extracting Movie's Features
			
 
				+```{r}
			
 
				+
			
 
				+# Extracting the movie's runtimes
			
 
				+## Finding the title CSS selector
			
 
				+runtime_selector <- ".runtime"
			
 
				+
			
 
				+## Identifying the number of elements this selector will select from Selector Gadget 
			
 
				+n_runtime <- 30
			
 
				+
			
 
				+## Extracting the movie runtimes combining the `html_nodes()` and `html_text()` function
			
 
				+runtimes <- wp_content %>% 
			
 
				+  html_nodes(runtime_selector) %>% 
			
 
				+  html_text()
			
 
				+
			
 
				+## Converting the runtimes from character to numeric data type
			
 
				+runtimes <- readr::parse_number(runtimes)
			
 
				+
			
 
				+## Printing runtimes vector
			
 
				+runtimes
			
 
				+
			
 
				+# Extracting the movie's genres
			
 
				+## Extracting the movie genres using a similar process as previously
			
 
				+genre_selector <- ".genre"
			
 
				+n_genre <- 30
			
 
				+genres <- wp_content %>% 
			
 
				+  html_nodes(genre_selector) %>% 
			
 
				+  html_text()
			
 
				+
			
 
				+## Removing whitespaces at the end of genre characters
			
 
				+genres <- stringr::str_trim(genres)
			
 
				+
			
 
				+## Printing genres vector
			
 
				+genres
			
 
				+```
			
 
				+
			
 
				+# Extracting Movie's Ratings
			
 
				+```{r}
			
 
				+# Extracting the movie's user ratings
			
 
				+## Finding the user rating CSS selector
			
 
				+user_rating_selector <- ".ratings-imdb-rating"
			
 
				+
			
 
				+## Identifying the number of elements this selector will select from Selector Gadget 
			
 
				+n_user_rating <- 29
			
 
				+
			
 
				+## Extracting the user rating combining the `html_nodes()` and `html_attr()` function
			
 
				+user_ratings <- wp_content %>% 
			
 
				+  html_nodes(user_rating_selector) %>% 
			
 
				+  html_attr("data-value")
			
 
				+
			
 
				+## Converting the user rating from character to numeric data type
			
 
				+user_ratings <- as.numeric(user_ratings)
			
 
				+
			
 
				+## Printing user ratings vector
			
 
				+user_ratings
			
 
				+
			
 
				+# Extracting the movie's metascores
			
 
				+## Extracting the movie metascore using a similar process as previously
			
 
				+metascore_selector <- ".metascore"
			
 
				+n_metascore <- 25
			
 
				+metascores <- wp_content %>% 
			
 
				+  html_nodes(metascore_selector) %>% 
			
 
				+  html_text()
			
 
				+
			
 
				+## Removing whitespaces at the end of metascores and converting them into numeric
			
 
				+metascores <- stringr::str_trim(metascores)
			
 
				+metascores <- as.numeric(metascores)
			
 
				+
			
 
				+## Printing metascores vector
			
 
				+metascores
			
 
				+```
			
 
				+
			
 
				+# Extracting Movie's Votes
			
 
				+```{r}
			
 
				+
			
 
				+# Extracting the movie's votes
			
 
				+## Finding the vote CSS selector
			
 
				+vote_selector <- ".sort-num_votes-visible :nth-child(2)"
			
 
				+
			
 
				+## Identifying the number of elements this selector will select from Selector Gadget 
			
 
				+n_vote <- 29
			
 
				+
			
 
				+## Extracting the votes combining the `html_nodes()` and `html_text()` function
			
 
				+votes <- wp_content %>% 
			
 
				+  html_nodes(vote_selector) %>% 
			
 
				+  html_text()
			
 
				+
			
 
				+## Converting the vote from character to numeric data type
			
 
				+votes <- readr::parse_number(votes)
			
 
				+
			
 
				+## Printing votes vector
			
 
				+votes
			
 
				+```
			
 
				+
			
 
				+
			
 
				+# Putting all together and Visualize
			
 
				+```{r}
			
 
				+# Creating a dataframe with the data we previously extracted: titles, years, runtimes, genres, user ratings, and votes.
			
 
				+## Removing the 17th element from the vectors: titles, years, runtimes, and genres
			
 
				+## Keeping only the integer part of the user ratings using the `floor()` function. For example, `3.4` becomes `3`.
			
 
				+movie_df <- tibble::tibble("title" = titles[-17], 
			
 
				+                           "year" = years[-17], 
			
 
				+                           "runtime" = runtimes[-17], 
			
 
				+                           "genre" = genres[-17], 
			
 
				+                           "rating" = floor(user_ratings), 
			
 
				+                           "vote" = votes)
			
 
				+
			
 
				+# Creating a boxplot that show the number of vote again the user rating
			
 
				+ggplot(data = movie_df,
			
 
				+       aes(x = rating, y = vote, group = rating)) +
			
 
				+  geom_boxplot()
			
 
				+```