Simple exercise analysing stocks using IPython
In [1]:
from pandas_datareader import data
import pandas as pd
%matplotlib inline
# Define the instruments to download. We would like to see Apple, Microsoft and the S&P500 index.
# tickers = ['W', 'HD', 'AMZN','LOW','LL']
tickers = ['ROIC', 'SKT', 'TCO' ,'SPG' ,'MAC']
# Define which online source one should use
data_source = 'yahoo'
# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = '2016-12-01'
end_date = '2017-12-31'
# User pandas_reader.data.DataReader to load the desired data. As simple as that.
panel_data = data.DataReader(tickers, data_source, start_date, end_date)
In [2]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
close = panel_data.ix['Close']
# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')
# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex close using all_weekdays as the new index
close = close.reindex(all_weekdays)
close.head()
Out[2]:
In [3]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
adj_close = panel_data.ix['Adj Close']
# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')
# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex adj_close using all_weekdays as the new index
adj_close = adj_close.reindex(all_weekdays)
# Reindexing will insert missing values (NaN) for the dates that were not present
# in the original set. To cope with this, we can fill the missing by replacing them
# with the latest available price for each instrument.
adj_close = adj_close.fillna(method='ffill')
adj_close.describe()
Out[3]:
In [1]:
# Define your own bins
mybins = range(df.MAC.min(), df.MAC.max(), 10)
# Cut the data with the help of the bins
df['MAC_bucket'] = pd.cut(df.MAC, bins=mybins)
# Count the number of values per bucket
df['MAC_bucket'].value_counts()