# Stock Price Data

In [34]:
import os
import pandas as pd

stock_prices = {}

for fn in os.listdir("prices"):
    # Get the name of the file without extension "aapl.csv" -> "aapl"
    name = fn.split(".")[0]
    stock_prices[name] = pd.read_csv(os.path.join("prices", fn))

We chose a dictionary where the keys are the stock symbols and the values are DataFrames from the corresponding CSV file.

Let's display the data stored for the `aapl` stock symbol:

In [35]:
stock_prices["aapl"].head()

Unnamed: 0,date,close,open,high,low,volume
0,2007-01-03,83.800002,86.289999,86.579999,81.899999,309579900
1,2007-01-04,85.659998,84.050001,85.949998,83.820003,211815100
2,2007-01-05,85.049997,85.77,86.199997,84.400002,208685400
3,2007-01-08,85.47,85.959998,86.529998,85.280003,199276700
4,2007-01-09,92.570003,86.450003,92.979999,85.15,837324600


# Computing Aggregates

## Computing average closing prices 

In [36]:
avg_closing_prices = {}

for stock_sym in stock_prices:
    avg_closing_prices[stock_sym] = stock_prices[stock_sym]["close"].mean()

## Displaying the average closing prices

In [37]:
for stock_sym in stock_prices:
    print(stock_sym, avg_closing_prices[stock_sym])

eqix 165.3847721150579
club 7.270509651737427
bmrc 39.35481079459455
cald 8.608965250965264
cybe 9.964861003860992
bbry 43.67659082355207
chscp 29.07304635598456
essa 12.126070440047481
cprx 1.976200772200771
arrs 17.10461388532818
ctic 1.4943663119691135
adrd 22.51748262046331
arna 4.915745173745166
ffic 16.593648647876414
ca 25.746281860231644
alot 10.28669884208494
csfl 11.947644780694985
cern 65.04237453166031
fhco 4.28845945945947
dvax 6.0337528984555995
exel 6.616277998455593
abcb 17.990475994208477
alog 64.74335521467185
bncn 13.986131252895746
eltk 1.5323436293436348
fbiz 22.95887644826253
brks 10.52473359227799
cunb 15.99822393513515
clrb 1.204571143629345
agen 2.9998899559845587
amzn 275.1340775710431
eqfn 5.558436266023189
evep 31.358648642471
bnso 1.717254826254819
asys 8.914054046332067
fisi 19.938084950965262
cbio 8.433602686100393
flic 27.73225096177597
bmrn 50.521710407335874
bcbp 11.546521235135131
aezs 1.739144594980703
cmls 3.678938223938218
apwc 3.2336409266409234
c

## Minimum and maximum closing prices

In [38]:
pairs = [(avg_closing_prices[stock_sym], stock_sym) for stock_sym in stock_prices]

pairs.sort()

print("Two minimum average closing prices:")
print(pairs[0])
print(pairs[1])

print()

print("Two maximum average closing prices:")
print(pairs[-1])
print(pairs[-2])

Two minimum average closing prices:
(0.8122763011583004, 'blfs')
(0.824100993822394, 'apdn')

Two maximum average closing prices:
(275.1340775710431, 'amzn')
(257.17654040231656, 'aapl')


It appears the `amzn` and `aapl` have the highest average closing prices, while `blfs` and `apdn` have the lowest average closing prices.

# Organizing the Trades Per Day

We are going to calculate a dictionary where the keys are the days and the values are lists of pairs `(volume, stock_symbol)` of all trades that occurred on that day.

In [None]:
trades_by_day = {}

for stock_sym in stock_prices:
    for index, row in stock_prices[stock_sym].iterrows():
        day = row["date"]
        volume = row["volume"]
        pair = (volume, stock_sym)
        if day not in trades_by_day:
            trades_by_day[day] = []
        trades_by_day[day].append(pair)

# Finding the Most Traded Stock Each Day

Calculate a dictionary where the keys are the days and the value of each day is a pair `(volume, stock_symbol)` with the most traded stock symbol on that day.

In [42]:
most_traded_by_day = {}

for day in trades_by_day:
    trades_by_day[day].sort()
    most_traded_by_day[day] = trades_by_day[day][-1]

## Verify a Few of the Results

In [44]:
print(most_traded_by_day['2007-01-03'])
print(most_traded_by_day['2007-01-04'])
print(most_traded_by_day['2007-01-05'])
print(most_traded_by_day['2007-01-08'])

(309579900, 'aapl')
(211815100, 'aapl')
(208685400, 'aapl')
(199276700, 'aapl')


# Searching for High Volume Days

In [47]:
daily_volumes = []

for day in trades_by_day:
    day_volume = sum([volume for volume, _ in trades_by_day[day]])
    daily_volumes.append((day_volume, day))

daily_volumes.sort()

daily_volumes[-10:]

[(1533363200, '2008-01-24'),
 (1536176400, '2008-01-16'),
 (1553880500, '2007-11-08'),
 (1555072400, '2008-09-29'),
 (1559032100, '2008-02-07'),
 (1578877700, '2008-01-22'),
 (1599183500, '2008-10-08'),
 (1611272800, '2007-07-26'),
 (1770266900, '2008-10-10'),
 (1964583900, '2008-01-23')]

# Finding Profitable Stocks

In [54]:
percentages = []

for stock_sym in stock_prices:
    prices = stock_prices[stock_sym]
    initial = prices.loc[0, "close"]
    final = prices.loc[prices.shape[0] - 1, "close"]
    percentage = 100 * (final - initial) / initial
    percentages.append((percentage, stock_sym))

percentages.sort()

percentages[-10:]

[(1330.0000666666667, 'achc'),
 (1339.2137535980346, 'bcli'),
 (1525.162516251625, 'cui'),
 (1549.6700659868027, 'apdn'),
 (1707.3554472785036, 'anip'),
 (2230.7234281466817, 'amzn'),
 (2437.4365640858978, 'blfs'),
 (3898.6004898285596, 'arcw'),
 (4005.0000000000005, 'adxs'),
 (7483.8389225948395, 'admp')]

The most profitable stock to buy in `2007` would have been `ADMP`, which appreciated from around `7` cents to its current price of `4.43`.