|
@@ -0,0 +1,104 @@
|
|
|
|
+{
|
|
|
|
+ "cells": [
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# Set up libraries and look at first few rows\n",
|
|
|
|
+ "library(RSQLite)\n",
|
|
|
|
+ "library(DBI)\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "conn = dbConnect(RSQLite(), \"./factbook.db\")\n",
|
|
|
|
+ "q1 = \"SELECT * FROM facts LIMIT 5\"\n",
|
|
|
|
+ "result1 = dbGetQuery(conn, q1)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# Looking at summary statistics\n",
|
|
|
|
+ "q2 = \"SELECT MIN(population), MAX(population), MIN(population_growth), MAX(population_growth) FROM facts\"\n",
|
|
|
|
+ "result2 = dbGetQuery(conn, q2)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# Investigating outlier values\n",
|
|
|
|
+ "q3 = \"SELECT * FROM facts WHERE (population == MAX(population))\"\n",
|
|
|
|
+ "result3 = dbGetQuery(conn, q3)\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "q4 = \"SELECT * FROM facts WHERE (population == MIN(population))\"\n",
|
|
|
|
+ "result4 = dbGetQuery(conn, q3)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# Omitting outlier values from the query\n",
|
|
|
|
+ "q5 = \"SELECT population, population_growth, birth_rate, death_rate FROM facts WHERE (population != MAX(population) AND population != MIN(population))\"\n",
|
|
|
|
+ "result5 = dbGetQuery(conn, q5)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# Plotting histograms for the variables from Q5\n",
|
|
|
|
+ "library(tidyverse)\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "tidy_result5 = result5 %>%\n",
|
|
|
|
+ "gather(., key = \"variable\", value = \"val\")\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "ggplot(data = result5, aes(x = val)) +\n",
|
|
|
|
+ "geom_histogram() + \n",
|
|
|
|
+ "facet_grid(~ variable)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# Calculating and sorting by population density\n",
|
|
|
|
+ "q7 = \"SELECT name, cast(population as float)/cast(area as float) density FROM facts ORDER BY density DESC\"\n",
|
|
|
|
+ "result7 = dbGetQuery(conn, q7)"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "metadata": {
|
|
|
|
+ "kernelspec": {
|
|
|
|
+ "display_name": "Python 3",
|
|
|
|
+ "language": "python",
|
|
|
|
+ "name": "python3"
|
|
|
|
+ },
|
|
|
|
+ "language_info": {
|
|
|
|
+ "codemirror_mode": {
|
|
|
|
+ "name": "ipython",
|
|
|
|
+ "version": 3
|
|
|
|
+ },
|
|
|
|
+ "file_extension": ".py",
|
|
|
|
+ "mimetype": "text/x-python",
|
|
|
|
+ "name": "python",
|
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
|
+ "version": "3.6.5"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "nbformat": 4,
|
|
|
|
+ "nbformat_minor": 2
|
|
|
|
+}
|