In [1]:

from pandas_datareader import data
import pandas as pd
%matplotlib inline


# Define the instruments to download. We would like to see Apple, Microsoft and the S&P500 index.
# tickers = ['W', 'HD', 'AMZN','LOW','LL']
tickers = ['ROIC', 'SKT', 'TCO' ,'SPG' ,'MAC']
# Define which online source one should use
data_source = 'yahoo'

# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = '2016-12-01'
end_date = '2017-12-31'

# User pandas_reader.data.DataReader to load the desired data. As simple as that.
panel_data = data.DataReader(tickers, data_source, start_date, end_date)

In [2]:

# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
close = panel_data.ix['Close']

# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex close using all_weekdays as the new index
close = close.reindex(all_weekdays)

close.head()

Out[2]:

	MAC	ROIC	SKT	SPG	TCO
2016-12-01	67.410004	20.320000	34.299999	176.820007	71.510002
2016-12-02	68.129997	20.620001	34.650002	179.360001	72.080002
2016-12-05	68.800003	20.730000	34.970001	180.759995	72.610001
2016-12-06	69.050003	20.799999	35.270000	180.110001	73.489998
2016-12-07	70.720001	21.200001	35.900002	183.750000	74.500000

In [3]:

# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
adj_close = panel_data.ix['Adj Close']

# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex adj_close using all_weekdays as the new index
adj_close = adj_close.reindex(all_weekdays)

# Reindexing will insert missing values (NaN) for the dates that were not present
# in the original set. To cope with this, we can fill the missing by replacing them
# with the latest available price for each instrument.
adj_close = adj_close.fillna(method='ffill')
adj_close.describe()

Out[3]:

	MAC	ROIC	SKT	SPG	TCO
count	282.000000	282.000000	282.000000	282.000000	282.000000
mean	60.359130	19.702806	27.499412	163.018773	59.680361
std	4.765473	0.695494	3.795979	7.915956	7.001413
min	52.056969	17.806831	21.890348	148.815491	45.844936
25%	56.158371	19.218705	24.376531	156.912205	54.580725
50%	60.819279	19.729540	25.674796	161.862656	59.257576
75%	64.790953	20.204208	31.491758	168.811760	64.934494
max	69.344276	21.429085	34.961044	180.091019	73.055801

In [1]:

# Define your own bins
mybins = range(df.MAC.min(), df.MAC.max(), 10)

# Cut the data with the help of the bins
df['MAC_bucket'] = pd.cut(df.MAC, bins=mybins)

# Count the number of values per bucket
df['MAC_bucket'].value_counts()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-5cc82bb1cc93> in <module>()
      1 # Define your own bins
----> 2 mybins = range(df.MAC.min(), df.MAC.max(), 10)
      3 
      4 # Cut the data with the help of the bins
      5 df['MAC_bucket'] = pd.cut(df.MAC, bins=mybins)

NameError: name 'df' is not defined