|
@@ -0,0 +1,268 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Python Intermediate: Creating a Baby Pandas Class Solutions"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Introduction"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "In this project, we'll be building a mini-version of the Pandas class. The goal of this project, is to create a class that can perform basic Pandas functions. The questions we'll be answering in this project are: \n",
|
|
|
+ " - Which song had the highest number of plays in one day?\n",
|
|
|
+ " - Which song had the lowest number of plays in one day?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Specifications/Requirements\n",
|
|
|
+ "\n",
|
|
|
+ "BabyPandas should make it easy for us to load , preview, manipulate, and make calculations with our data. \n",
|
|
|
+ "\n",
|
|
|
+ "To preview our data, we’ll need to:\n",
|
|
|
+ "- Be able to view the first five rows\n",
|
|
|
+ "- Be able to view the shape of our data\n",
|
|
|
+ "- Be able to view the data types for each column\n",
|
|
|
+ "\n",
|
|
|
+ "To manipulate our data, we’ll need to: \n",
|
|
|
+ "- Add new columns\n",
|
|
|
+ "- Be able to apply values to columns\n",
|
|
|
+ "- Be able to subset our data\n",
|
|
|
+ "- Change the data type\n",
|
|
|
+ "\n",
|
|
|
+ "To make calculations, we’ll need to:\n",
|
|
|
+ "- Finding the minimum\n",
|
|
|
+ "- Finding the maximum\n",
|
|
|
+ "- Finding the mean\n",
|
|
|
+ "- Finding the standard deviation"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Translating our words into objects\n",
|
|
|
+ "\n",
|
|
|
+ "BabyPandas -> Class\n",
|
|
|
+ "\n",
|
|
|
+ "Load -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "Data -> Attribute\n",
|
|
|
+ "\n",
|
|
|
+ "Columns -> Attribute\n",
|
|
|
+ "\n",
|
|
|
+ "## Preview\n",
|
|
|
+ "\n",
|
|
|
+ "View the first five rows -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "View num of rows/cols of our data -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "View the data types for each column -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "## Manipulate\n",
|
|
|
+ "\n",
|
|
|
+ "Add new columns -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "Apply values to columns -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "subset our data -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "Change the data type -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "## Calculations\n",
|
|
|
+ "\n",
|
|
|
+ "Minimum -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "Maximum -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "Mean -> Method\n",
|
|
|
+ "\n",
|
|
|
+ "Standard deviation -> Method"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 31,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "2\n",
|
|
|
+ "['Despacito (Featuring Daddy Yankee)', 'Luis Fonsi', 64238]\n",
|
|
|
+ "['Por Fin Te Encontré', 'Cali Y El Dandee', 1993]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "import csv\n",
|
|
|
+ "from statistics import mean, stdev, median, mode\n",
|
|
|
+ "\n",
|
|
|
+ "class BabyPandas():\n",
|
|
|
+ " def __init__(self, filename):\n",
|
|
|
+ " self.filename = filename\n",
|
|
|
+ " \n",
|
|
|
+ " def read_data(self):\n",
|
|
|
+ " '''\n",
|
|
|
+ " Reads and opens the data\n",
|
|
|
+ " '''\n",
|
|
|
+ " f = open(self.filename,\"r\")\n",
|
|
|
+ " self.data = list(csv.reader(f))\n",
|
|
|
+ " self.columns = self.data[0]\n",
|
|
|
+ " \n",
|
|
|
+ " def head(self):\n",
|
|
|
+ " '''\n",
|
|
|
+ " Displays the first five rows\n",
|
|
|
+ " '''\n",
|
|
|
+ " return self.data[:5]\n",
|
|
|
+ " \n",
|
|
|
+ " def info(self):\n",
|
|
|
+ " types = []\n",
|
|
|
+ " for value in self.data[1]:\n",
|
|
|
+ " types.append(type(value))\n",
|
|
|
+ " return types\n",
|
|
|
+ " \n",
|
|
|
+ " \n",
|
|
|
+ " def shape(self):\n",
|
|
|
+ " num_rows = 0\n",
|
|
|
+ " for row in self.data:\n",
|
|
|
+ " num_rows += 1\n",
|
|
|
+ " \n",
|
|
|
+ " num_cols = len(self.data[0])\n",
|
|
|
+ " return [num_rows, num_cols]\n",
|
|
|
+ " \n",
|
|
|
+ " def new_column(self, column_name):\n",
|
|
|
+ " for pos, d in enumerate(self.data):\n",
|
|
|
+ " if pos == 0:\n",
|
|
|
+ " d.append(column_name)\n",
|
|
|
+ " else:\n",
|
|
|
+ " d.append('NA')\n",
|
|
|
+ " \n",
|
|
|
+ " def apply(self, column_name, new_value):\n",
|
|
|
+ " for pos, col in enumerate(self.data[0]):\n",
|
|
|
+ " if col == column_name:\n",
|
|
|
+ " column_index = pos\n",
|
|
|
+ " \n",
|
|
|
+ " for data in self.data[1:]:\n",
|
|
|
+ " data[column_index] = new_value\n",
|
|
|
+ " \n",
|
|
|
+ " def change_type(self, column_name, function):\n",
|
|
|
+ " for pos, col in enumerate(self.data[0]):\n",
|
|
|
+ " if col == column_name:\n",
|
|
|
+ " column_index = pos\n",
|
|
|
+ " \n",
|
|
|
+ " for data in self.data[1:]:\n",
|
|
|
+ " data[column_index] = function(data[column_index])\n",
|
|
|
+ " \n",
|
|
|
+ " def subset(self, column_name, row_value):\n",
|
|
|
+ " for pos, col in enumerate(self.data[0]):\n",
|
|
|
+ " if col == column_name:\n",
|
|
|
+ " column_index = pos\n",
|
|
|
+ " \n",
|
|
|
+ " print(column_index)\n",
|
|
|
+ " subset_data = []\n",
|
|
|
+ " for data in self.data[1:]:\n",
|
|
|
+ " if row_value in data:\n",
|
|
|
+ " subset_data.append(data[column_index])\n",
|
|
|
+ " return subset_data\n",
|
|
|
+ "\n",
|
|
|
+ " \n",
|
|
|
+ " def summary_stats(self, column_name):\n",
|
|
|
+ " for pos, col in enumerate(self.data[0]):\n",
|
|
|
+ " if col == column_name:\n",
|
|
|
+ " column_index = pos\n",
|
|
|
+ "\n",
|
|
|
+ " num_data = [data[column_index] for data in self.data[1:]]\n",
|
|
|
+ " m = statistics.mean(num_data)\n",
|
|
|
+ " std = stdev(num_data)\n",
|
|
|
+ " median = statistics.median(num_data)\n",
|
|
|
+ " \n",
|
|
|
+ " print(\"Mean is {mean}\".format(mean= m))\n",
|
|
|
+ " print(\"Standard Deviation is {std}\".format(std= std))\n",
|
|
|
+ " print(\"Median is {median}\".format(median= median))\n",
|
|
|
+ " \n",
|
|
|
+ " \n",
|
|
|
+ " def minimum(self, column):\n",
|
|
|
+ " for pos, col in enumerate(self.data[0]):\n",
|
|
|
+ " if col == column:\n",
|
|
|
+ " column_index = pos\n",
|
|
|
+ "\n",
|
|
|
+ " ## Find min value\n",
|
|
|
+ " col_data = []\n",
|
|
|
+ " for row in self.data[1:]:\n",
|
|
|
+ " col_data.append([row[1],row[2],row[column_index]])\n",
|
|
|
+ " \n",
|
|
|
+ " return min(col_data, key= lambda x: x[2])\n",
|
|
|
+ " \n",
|
|
|
+ " def maximum(self, column):\n",
|
|
|
+ " for pos, col in enumerate(self.data[0]):\n",
|
|
|
+ " if col == column:\n",
|
|
|
+ " column_index = pos\n",
|
|
|
+ " ## Find min value\n",
|
|
|
+ " col_data = []\n",
|
|
|
+ " for row in self.data[1:]:\n",
|
|
|
+ " col_data.append([row[1],row[2],row[column_index]])\n",
|
|
|
+ " return max(col_data, key= lambda x: x[2])\n",
|
|
|
+ " \n",
|
|
|
+ "s = BabyPandas(\"music_data.csv\")\n",
|
|
|
+ "s.read_data()\n",
|
|
|
+ "\n",
|
|
|
+ "s.info()\n",
|
|
|
+ "s.shape()\n",
|
|
|
+ "s.columns\n",
|
|
|
+ "s.new_column('hello')\n",
|
|
|
+ "s.change_type('Streams',int)\n",
|
|
|
+ "s.subset(\"Artist\",\"Shakira\")\n",
|
|
|
+ "print(s.maximum(\"Streams\"))\n",
|
|
|
+ "print(s.minimum(\"Streams\"))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Results\n",
|
|
|
+ "\n",
|
|
|
+ "The song that had the highest number of streams in one day was Despacito by Luis Fonsi with 64238 streams. \n",
|
|
|
+ "\n",
|
|
|
+ "The song that had the lowest number of streams in one day was Por Fin Te Encontre by Cali Y El Dandee with 1993. \n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.6.3"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|