Queer European MD passionate about IT

Mission177Solutions.ipynb 30 KB

# Set up libraries and look at first few rows
library(RSQLite)
library(DBI)

conn = dbConnect(SQLite(), "./factbook.db")
q1 = "SELECT * FROM facts LIMIT 5"
result1 = dbGetQuery(conn, q1)
# Looking at summary statistics
q2 = "SELECT MIN(population), MAX(population), MIN(population_growth), MAX(population_growth) FROM facts"
result2 = dbGetQuery(conn, q2)
# Investigating outlier values
q3 = "SELECT * FROM facts WHERE (population == (SELECT MAX(population) FROM facts))"
result3 = dbGetQuery(conn, q3)

q4 = "SELECT * FROM facts WHERE (population == (SELECT MIN(population) FROM facts))"
result4 = dbGetQuery(conn, q4)
# Omitting outlier values from the query
q5 = "SELECT population, population_growth, birth_rate, death_rate FROM facts WHERE ((population != (SELECT MAX(population) FROM facts)) AND (population != (SELECT MIN(population) FROM facts)))"
result5 = dbGetQuery(conn, q5)
# Plotting histograms for the variables from Q5
library(tidyverse)

tidy_result5 = result5 %>%
gather(., key = "variable", value = "val")

ggplot(data = result5, aes(x = val)) +
geom_histogram() + 
facet_grid(~ variable)
# Calculating and sorting by population density
q7 = "SELECT name, cast(population as float)/cast(area as float) density FROM facts ORDER BY density DESC"
result7 = dbGetQuery(conn, q7)