# Stock Price Data

In [45]:
import concurrent.futures
import os

def read_file(filename):
 with open(filename, 'r') as f:
 data = f.read().strip()
 key = filename.replace(".csv", "").replace("prices/", "")
 data = data.split("\n")
 data = [d.split(",") for d in data]
 return key, data

results = []
pool = concurrent.futures.ProcessPoolExecutor(max_workers=2)
filenames = ["prices/{}".format(f) for f in os.listdir("prices")]
prices = pool.map(read_file, filenames)
prices = list(prices)
prices = dict(prices)

I chose to use a hash table, then a list, then another list. This is because it closely mirrors the existing structure of the data. It allows stock symbols to be easily looked up, and enables me to index values efficiently.

# Computing Aggregates

In [46]:
from dateutil.parser import parse

prices_columns = {}

for k,v in prices.items():
 price = v
 headers = price[0]
 price_columns = {}
 for i, header in enumerate(headers):
 values = [p[i] for p in price[1:]]
 if i > 0:
 values = [float(v) for v in values]
 else:
 values = [parse(v) for v in values]
 price_columns[header] = values
 prices_columns[k] = price_columns

In [47]:
from statistics import mean

average_closing = {}
for k,v in prices_columns.items():
 average_closing[k] = mean(v["close"])

In [48]:
closing_tuples = [(k,v) for k,v in average_closing.items()]
sorted(closing_tuples, key=lambda x:x[1])

[('blfs', 0.8122763011583011),
 ('apdn', 0.8241009938223938),
 ('bmra', 0.901011583011583),
 ('bcli', 0.9969415324324323),
 ('cyrx', 1.1615408884169884),
 ('clrb', 1.2045711436293436),
 ('cpst', 1.206953667953668),
 ('csbr', 1.2282443845854418),
 ('egt', 1.3293513513513513),
 ('aemd', 1.398042471042471),
 ('dfbg', 1.4005010393822395),
 ('alqa', 1.405298283011583),
 ('cpah', 1.4116189448441248),
 ('astc', 1.4152123552123552),
 ('chci', 1.4581224154440156),
 ('ctic', 1.494366311969112),
 ('eltk', 1.5323436293436294),
 ('dzsi', 1.5382316602316601),
 ('cool', 1.5475988922779924),
 ('cgnt', 1.5946138996138997),
 ('creg', 1.6028996138996139),
 ('casi', 1.617906349034749),
 ('admp', 1.7122164397683397),
 ('bnso', 1.7172548262548262),
 ('aezs', 1.7391445949806952),
 ('dynt', 1.822119691119691),
 ('apps', 1.8256061776061776),
 ('dysl', 1.8631660231660232),
 ('apri', 1.8681738996138995),
 ('crds', 1.8903166015444017),
 ('dlhc', 1.8903745173745172),
 ('cur', 1.907691699604743),
 ('ardm', 1.928069

It appears the `AMZN` and `AAPL` have the highest average closing prices, while `BLFS`, and `APDN` have the lowest average closing prices.

# Finding The Most Traded Stock Each Day

In [49]:
trades = {}
for k, v in prices_columns.items():
 for i,date in enumerate(v["date"]):
 if date not in trades:
 trades[date] = []
 trades[date].append([k,v["volume"][i]])

In [50]:
most_traded = []
for k, v in trades.items():
 ordered = sorted(v, key=lambda x: x[1])
 symbol = ordered[-1][0]
 most_traded.append([k, symbol])
most_traded = sorted(most_traded, key=lambda x: x[0])

most_traded

[[datetime.datetime(2007, 1, 3, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 4, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 5, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 8, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 9, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 10, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 11, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 12, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 16, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 17, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 18, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 19, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 22, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 23, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 24, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 25, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 26, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 29, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 30, 0, 0), 'aapl'],
 [datetime.datetime(2007, 1, 31, 0, 0), 'aapl'],
 [datetime.datetime(2007,

# Searching For High Volume Days

In [51]:
daily_volumes = {}

most_traded = []
for k, v in trades.items():
 volume = sum([item[1] for item in v])
 daily_volumes[k] = volume

In [52]:
volume_tuples = [[k,v] for k,v in daily_volumes.items()]
volume_tuples = sorted(volume_tuples, key=lambda x: x[1])

volume_tuples[-10:]

[[datetime.datetime(2008, 1, 24, 0, 0), 1533363200.0],
 [datetime.datetime(2008, 1, 16, 0, 0), 1536176400.0],
 [datetime.datetime(2007, 11, 8, 0, 0), 1553880500.0],
 [datetime.datetime(2008, 9, 29, 0, 0), 1555072400.0],
 [datetime.datetime(2008, 2, 7, 0, 0), 1559032100.0],
 [datetime.datetime(2008, 1, 22, 0, 0), 1578877700.0],
 [datetime.datetime(2008, 10, 8, 0, 0), 1599183500.0],
 [datetime.datetime(2007, 7, 26, 0, 0), 1611272800.0],
 [datetime.datetime(2008, 10, 10, 0, 0), 1770266900.0],
 [datetime.datetime(2008, 1, 23, 0, 0), 1964583900.0]]

In [53]:
import math

high_volume_days = [v[0] for v in volume_tuples[-10:]]

def binary_search(array, search):
 m = 0
 i = 0
 z = len(array) - 1
 while i<= z:
 m = math.floor(i + ((z - i) / 2))
 if array[m] == search:
 return m
 elif array[m] < search:
 i = m + 1
 elif array[m] > search:
 z = m - 1

high_volume_transactions = {}
for k,v in prices_columns.items():
 for day in high_volume_days:
 ind = binary_search(v["date"], day)
 if ind is None:
 continue
 if k not in high_volume_transactions:
 high_volume_transactions[k] = []
 high_volume_transactions[k].append(prices[k][ind])

# Finding Profitable Stocks

In [54]:
profits = []
for k,v in prices_columns.items():
 percentage = (v["close"][-1] - v["close"][0]) / v["close"][0]
 profits.append([k,percentage * 100])

profits = sorted(profits, key=lambda x: x[1])

profits[-10:]

[['achc', 1330.0000666666667],
 ['bcli', 1339.2137535980346],
 ['cui', 1525.1625162516252],
 ['apdn', 1549.6700659868025],
 ['anip', 1707.3554472785033],
 ['amzn', 2230.7234281466817],
 ['blfs', 2437.4365640858978],
 ['arcw', 3898.60048982856],
 ['adxs', 4005.0000000000005],
 ['admp', 7483.8389225948395]]

The most profitable stock to buy in `2007` would have been `ADMP`, which appreciated from around `7` cents to its current price of `4.43`.