Simple exercise analysing stocks using IPython

In [1]:
from pandas_datareader import data
import pandas as pd
%matplotlib inline


# Define the instruments to download. We would like to see Apple, Microsoft and the S&P500 index.
# tickers = ['W', 'HD', 'AMZN','LOW','LL']
tickers = ['ROIC', 'SKT', 'TCO' ,'SPG' ,'MAC']
# Define which online source one should use
data_source = 'yahoo'

# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = '2016-12-01'
end_date = '2017-12-31'

# User pandas_reader.data.DataReader to load the desired data. As simple as that.
panel_data = data.DataReader(tickers, data_source, start_date, end_date)
In [2]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
close = panel_data.ix['Close']

# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex close using all_weekdays as the new index
close = close.reindex(all_weekdays)

close.head()
Out[2]:
MAC ROIC SKT SPG TCO
2016-12-01 67.410004 20.320000 34.299999 176.820007 71.510002
2016-12-02 68.129997 20.620001 34.650002 179.360001 72.080002
2016-12-05 68.800003 20.730000 34.970001 180.759995 72.610001
2016-12-06 69.050003 20.799999 35.270000 180.110001 73.489998
2016-12-07 70.720001 21.200001 35.900002 183.750000 74.500000
In [3]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
adj_close = panel_data.ix['Adj Close']

# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex adj_close using all_weekdays as the new index
adj_close = adj_close.reindex(all_weekdays)

# Reindexing will insert missing values (NaN) for the dates that were not present
# in the original set. To cope with this, we can fill the missing by replacing them
# with the latest available price for each instrument.
adj_close = adj_close.fillna(method='ffill')
adj_close.describe()
Out[3]:
MAC ROIC SKT SPG TCO
count 282.000000 282.000000 282.000000 282.000000 282.000000
mean 60.359130 19.702806 27.499412 163.018773 59.680361
std 4.765473 0.695494 3.795979 7.915956 7.001413
min 52.056969 17.806831 21.890348 148.815491 45.844936
25% 56.158371 19.218705 24.376531 156.912205 54.580725
50% 60.819279 19.729540 25.674796 161.862656 59.257576
75% 64.790953 20.204208 31.491758 168.811760 64.934494
max 69.344276 21.429085 34.961044 180.091019 73.055801
In [1]:
# Define your own bins
mybins = range(df.MAC.min(), df.MAC.max(), 10)

# Cut the data with the help of the bins
df['MAC_bucket'] = pd.cut(df.MAC, bins=mybins)

# Count the number of values per bucket
df['MAC_bucket'].value_counts()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-5cc82bb1cc93> in <module>()
      1 # Define your own bins
----> 2 mybins = range(df.MAC.min(), df.MAC.max(), 10)
      3 
      4 # Cut the data with the help of the bins
      5 df['MAC_bucket'] = pd.cut(df.MAC, bins=mybins)

NameError: name 'df' is not defined