{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Introduction and Schema Diagram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Connected: None@chinook.db'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%capture\n",
    "%load_ext sql\n",
    "%sql sqlite:///chinook.db"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Overview of the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <th>name</th>\n",
       "        <th>type</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>album</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>artist</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>customer</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>employee</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>genre</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>invoice</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>invoice_line</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>media_type</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>playlist</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>playlist_track</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>track</td>\n",
       "        <td>table</td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "[('album', 'table'),\n",
       " ('artist', 'table'),\n",
       " ('customer', 'table'),\n",
       " ('employee', 'table'),\n",
       " ('genre', 'table'),\n",
       " ('invoice', 'table'),\n",
       " ('invoice_line', 'table'),\n",
       " ('media_type', 'table'),\n",
       " ('playlist', 'table'),\n",
       " ('playlist_track', 'table'),\n",
       " ('track', 'table')]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "SELECT\n",
    "    name,\n",
    "    type\n",
    "FROM sqlite_master\n",
    "WHERE type IN (\"table\",\"view\");"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Selecting New Albums to Purchase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <th>genre</th>\n",
       "        <th>tracks_sold</th>\n",
       "        <th>percentage_sold</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Rock</td>\n",
       "        <td>561</td>\n",
       "        <td>0.5337773549000951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Alternative &amp; Punk</td>\n",
       "        <td>130</td>\n",
       "        <td>0.12369172216936251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Metal</td>\n",
       "        <td>124</td>\n",
       "        <td>0.11798287345385347</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>R&amp;B/Soul</td>\n",
       "        <td>53</td>\n",
       "        <td>0.05042816365366318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Blues</td>\n",
       "        <td>36</td>\n",
       "        <td>0.03425309229305423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Alternative</td>\n",
       "        <td>35</td>\n",
       "        <td>0.03330161750713606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Latin</td>\n",
       "        <td>22</td>\n",
       "        <td>0.02093244529019981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Pop</td>\n",
       "        <td>22</td>\n",
       "        <td>0.02093244529019981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Hip Hop/Rap</td>\n",
       "        <td>20</td>\n",
       "        <td>0.019029495718363463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Jazz</td>\n",
       "        <td>14</td>\n",
       "        <td>0.013320647002854425</td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "[('Rock', 561, 0.5337773549000951),\n",
       " ('Alternative & Punk', 130, 0.12369172216936251),\n",
       " ('Metal', 124, 0.11798287345385347),\n",
       " ('R&B/Soul', 53, 0.05042816365366318),\n",
       " ('Blues', 36, 0.03425309229305423),\n",
       " ('Alternative', 35, 0.03330161750713606),\n",
       " ('Latin', 22, 0.02093244529019981),\n",
       " ('Pop', 22, 0.02093244529019981),\n",
       " ('Hip Hop/Rap', 20, 0.019029495718363463),\n",
       " ('Jazz', 14, 0.013320647002854425)]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "\n",
    "WITH usa_tracks_sold AS\n",
    "   (\n",
    "    SELECT il.* FROM invoice_line il\n",
    "    INNER JOIN invoice i on il.invoice_id = i.invoice_id\n",
    "    INNER JOIN customer c on i.customer_id = c.customer_id\n",
    "    WHERE c.country = \"USA\"\n",
    "   )\n",
    "\n",
    "SELECT\n",
    "    g.name genre,\n",
    "    count(uts.invoice_line_id) tracks_sold,\n",
    "    cast(count(uts.invoice_line_id) AS FLOAT) / (\n",
    "        SELECT COUNT(*) from usa_tracks_sold\n",
    "    ) percentage_sold\n",
    "FROM usa_tracks_sold uts\n",
    "INNER JOIN track t on t.track_id = uts.track_id\n",
    "INNER JOIN genre g on g.genre_id = t.genre_id\n",
    "GROUP BY 1\n",
    "ORDER BY 2 DESC\n",
    "LIMIT 10;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Based on the sales of tracks across different genres in the USA, we should purchase the new albums by the following artists:\n",
    "\n",
    "- Red Tone (Punk)\n",
    "- Slim Jim Bites (Blues)\n",
    "- Meteor and the Girls (Pop)\n",
    "\n",
    "It's worth keeping in mind that combined, these three genres only make up only 17% of total sales, so we should be on the lookout for artists and albums from the 'rock' genre, which accounts for 53% of sales."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analyzing Employee Sales Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <th>employee</th>\n",
       "        <th>hire_date</th>\n",
       "        <th>total_sales</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Jane Peacock</td>\n",
       "        <td>2017-04-01 00:00:00</td>\n",
       "        <td>1731.5099999999998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Margaret Park</td>\n",
       "        <td>2017-05-03 00:00:00</td>\n",
       "        <td>1584.0000000000002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Steve Johnson</td>\n",
       "        <td>2017-10-17 00:00:00</td>\n",
       "        <td>1393.92</td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "[('Jane Peacock', '2017-04-01 00:00:00', 1731.5099999999998),\n",
       " ('Margaret Park', '2017-05-03 00:00:00', 1584.0000000000002),\n",
       " ('Steve Johnson', '2017-10-17 00:00:00', 1393.92)]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "\n",
    "WITH customer_support_rep_sales AS\n",
    "    (\n",
    "     SELECT\n",
    "         i.customer_id,\n",
    "         c.support_rep_id,\n",
    "         SUM(i.total) total\n",
    "     FROM invoice i\n",
    "     INNER JOIN customer c ON i.customer_id = c.customer_id\n",
    "     GROUP BY 1,2\n",
    "    )\n",
    "\n",
    "SELECT\n",
    "    e.first_name || \" \" || e.last_name employee,\n",
    "    e.hire_date,\n",
    "    SUM(csrs.total) total_sales\n",
    "FROM customer_support_rep_sales csrs\n",
    "INNER JOIN employee e ON e.employee_id = csrs.support_rep_id\n",
    "GROUP BY 1;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "While there is a 20% difference in sales between Jane (the top employee) and Steve (the bottom employee), the difference roughly corresponds with the differences in their hiring dates."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analyzing Sales by Country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <th>country</th>\n",
       "        <th>customers</th>\n",
       "        <th>total_sales</th>\n",
       "        <th>average_order</th>\n",
       "        <th>customer_lifetime_value</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>USA</td>\n",
       "        <td>13</td>\n",
       "        <td>1040.490000000008</td>\n",
       "        <td>7.942671755725252</td>\n",
       "        <td>80.03769230769292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Canada</td>\n",
       "        <td>8</td>\n",
       "        <td>535.5900000000034</td>\n",
       "        <td>7.047236842105309</td>\n",
       "        <td>66.94875000000043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Brazil</td>\n",
       "        <td>5</td>\n",
       "        <td>427.68000000000245</td>\n",
       "        <td>7.011147540983647</td>\n",
       "        <td>85.53600000000048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>France</td>\n",
       "        <td>5</td>\n",
       "        <td>389.0700000000021</td>\n",
       "        <td>7.781400000000042</td>\n",
       "        <td>77.81400000000042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Germany</td>\n",
       "        <td>4</td>\n",
       "        <td>334.6200000000016</td>\n",
       "        <td>8.161463414634186</td>\n",
       "        <td>83.6550000000004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Czech Republic</td>\n",
       "        <td>2</td>\n",
       "        <td>273.24000000000103</td>\n",
       "        <td>9.108000000000034</td>\n",
       "        <td>136.62000000000052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>United Kingdom</td>\n",
       "        <td>3</td>\n",
       "        <td>245.52000000000078</td>\n",
       "        <td>8.768571428571457</td>\n",
       "        <td>81.84000000000026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Portugal</td>\n",
       "        <td>2</td>\n",
       "        <td>185.13000000000022</td>\n",
       "        <td>6.3837931034482835</td>\n",
       "        <td>92.56500000000011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>India</td>\n",
       "        <td>2</td>\n",
       "        <td>183.1500000000002</td>\n",
       "        <td>8.72142857142858</td>\n",
       "        <td>91.5750000000001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>Other</td>\n",
       "        <td>15</td>\n",
       "        <td>1094.9400000000085</td>\n",
       "        <td>7.448571428571486</td>\n",
       "        <td>72.99600000000056</td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "[('USA', 13, 1040.490000000008, 7.942671755725252, 80.03769230769292),\n",
       " ('Canada', 8, 535.5900000000034, 7.047236842105309, 66.94875000000043),\n",
       " ('Brazil', 5, 427.68000000000245, 7.011147540983647, 85.53600000000048),\n",
       " ('France', 5, 389.0700000000021, 7.781400000000042, 77.81400000000042),\n",
       " ('Germany', 4, 334.6200000000016, 8.161463414634186, 83.6550000000004),\n",
       " ('Czech Republic', 2, 273.24000000000103, 9.108000000000034, 136.62000000000052),\n",
       " ('United Kingdom', 3, 245.52000000000078, 8.768571428571457, 81.84000000000026),\n",
       " ('Portugal', 2, 185.13000000000022, 6.3837931034482835, 92.56500000000011),\n",
       " ('India', 2, 183.1500000000002, 8.72142857142858, 91.5750000000001),\n",
       " ('Other', 15, 1094.9400000000085, 7.448571428571486, 72.99600000000056)]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "\n",
    "WITH country_or_other AS\n",
    "    (\n",
    "     SELECT\n",
    "       CASE\n",
    "           WHEN (\n",
    "                 SELECT count(*)\n",
    "                 FROM customer\n",
    "                 where country = c.country\n",
    "                ) = 1 THEN \"Other\"\n",
    "           ELSE c.country\n",
    "       END AS country,\n",
    "       c.customer_id,\n",
    "       il.*\n",
    "     FROM invoice_line il\n",
    "     INNER JOIN invoice i ON i.invoice_id = il.invoice_id\n",
    "     INNER JOIN customer c ON c.customer_id = i.customer_id\n",
    "    )\n",
    "\n",
    "SELECT\n",
    "    country,\n",
    "    customers,\n",
    "    total_sales,\n",
    "    average_order,\n",
    "    customer_lifetime_value\n",
    "FROM\n",
    "    (\n",
    "    SELECT\n",
    "        country,\n",
    "        count(distinct customer_id) customers,\n",
    "        SUM(unit_price) total_sales,\n",
    "        SUM(unit_price) / count(distinct customer_id) customer_lifetime_value,\n",
    "        SUM(unit_price) / count(distinct invoice_id) average_order,\n",
    "        CASE\n",
    "            WHEN country = \"Other\" THEN 1\n",
    "            ELSE 0\n",
    "        END AS sort\n",
    "    FROM country_or_other\n",
    "    GROUP BY country\n",
    "    ORDER BY sort ASC, total_sales DESC\n",
    "    );"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Based on the data, there may be opportunity in the following countries:\n",
    "\n",
    "- Czech Republic\n",
    "- United Kingdom\n",
    "- India\n",
    "\n",
    "It's worth keeping in mind that because the amount of data from each of these countries is relatively low.  Because of this, we should be cautious spending too much money on new marketing campaigns, as the sample size is not large enough to give us high confidence.  A better approach would be to run small campaigns in these countries, collecting and analyzing the new customers to make sure that these trends hold with new customers."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Albums vs Individual Tracks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "    <tr>\n",
       "        <th>album_purchase</th>\n",
       "        <th>number_of_invoices</th>\n",
       "        <th>percent</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>no</td>\n",
       "        <td>500</td>\n",
       "        <td>0.8143322475570033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "        <td>yes</td>\n",
       "        <td>114</td>\n",
       "        <td>0.18566775244299674</td>\n",
       "    </tr>\n",
       "</table>"
      ],
      "text/plain": [
       "[('no', 500, 0.8143322475570033), ('yes', 114, 0.18566775244299674)]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%sql\n",
    "\n",
    "WITH invoice_first_track AS\n",
    "    (\n",
    "     SELECT\n",
    "         il.invoice_id invoice_id,\n",
    "         MIN(il.track_id) first_track_id\n",
    "     FROM invoice_line il\n",
    "     GROUP BY 1\n",
    "    )\n",
    "\n",
    "SELECT\n",
    "    album_purchase,\n",
    "    COUNT(invoice_id) number_of_invoices,\n",
    "    CAST(count(invoice_id) AS FLOAT) / (\n",
    "                                         SELECT COUNT(*) FROM invoice\n",
    "                                      ) percent\n",
    "FROM\n",
    "    (\n",
    "    SELECT\n",
    "        ifs.*,\n",
    "        CASE\n",
    "            WHEN\n",
    "                 (\n",
    "                  SELECT t.track_id FROM track t\n",
    "                  WHERE t.album_id = (\n",
    "                                      SELECT t2.album_id FROM track t2\n",
    "                                      WHERE t2.track_id = ifs.first_track_id\n",
    "                                     ) \n",
    "\n",
    "                  EXCEPT \n",
    "\n",
    "                  SELECT il2.track_id FROM invoice_line il2\n",
    "                  WHERE il2.invoice_id = ifs.invoice_id\n",
    "                 ) IS NULL\n",
    "             AND\n",
    "                 (\n",
    "                  SELECT il2.track_id FROM invoice_line il2\n",
    "                  WHERE il2.invoice_id = ifs.invoice_id\n",
    "\n",
    "                  EXCEPT \n",
    "\n",
    "                  SELECT t.track_id FROM track t\n",
    "                  WHERE t.album_id = (\n",
    "                                      SELECT t2.album_id FROM track t2\n",
    "                                      WHERE t2.track_id = ifs.first_track_id\n",
    "                                     ) \n",
    "                 ) IS NULL\n",
    "             THEN \"yes\"\n",
    "             ELSE \"no\"\n",
    "         END AS \"album_purchase\"\n",
    "     FROM invoice_first_track ifs\n",
    "    )\n",
    "GROUP BY album_purchase;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Album purchases account for 18.6% of purchases.  Based on this data, I would recommend against purchasing only select tracks from albums from record companies, since there is potential to lose one fifth of revenue."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}