import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

returns = pd.DataFrame(np.random.normal(1.0, 0.03, (100, 10)))
prices = returns.cumprod()
prices.plot()
plt.title('Randomly-generated Prices')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend(loc=0);

s = pd.Series([1, 2, np.nan, 4, 5])
print(s)

0    1.0
1    2.0
2    NaN
3    4.0
4    5.0
dtype: float64

print(s.name)

None

s.name = "Toy Series"
print(s.name)

Toy Series

print(s.index)

RangeIndex(start=0, stop=5, step=1)

new_index = pd.date_range("2016-01-01", periods=len(s), freq="D")
print(new_index)

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05'],
              dtype='datetime64[ns]', freq='D')

s.index = new_index
print(s.index)

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05'],
              dtype='datetime64[ns]', freq='D')

print("First element of the series:", s.iloc[0])
print("Last element of the series:", s.iloc[len(s)-1])

First element of the series: 1.0
Last element of the series: 5.0

s.iloc[:2]

2016-01-01    1.0
2016-01-02    2.0
Freq: D, Name: Toy Series, dtype: float64

start = 0
end = len(s) - 1
step = 1

s.iloc[start:end:step]

2016-01-01    1.0
2016-01-02    2.0
2016-01-03    NaN
2016-01-04    4.0
Freq: D, Name: Toy Series, dtype: float64

s.iloc[::-1]

2016-01-05    5.0
2016-01-04    4.0
2016-01-03    NaN
2016-01-02    2.0
2016-01-01    1.0
Freq: -1D, Name: Toy Series, dtype: float64

s.iloc[-2:-4:-1]

2016-01-04    4.0
2016-01-03    NaN
Freq: -1D, Name: Toy Series, dtype: float64

s.loc['2016-01-01']

1.0

s.loc['2016-01-02':'2016-01-04']

2016-01-02    2.0
2016-01-03    NaN
2016-01-04    4.0
Freq: D, Name: Toy Series, dtype: float64

print(s < 3)

2016-01-01     True
2016-01-02     True
2016-01-03    False
2016-01-04    False
2016-01-05    False
Freq: D, Name: Toy Series, dtype: bool

print(s.loc[s < 3])

2016-01-01    1.0
2016-01-02    2.0
Freq: D, Name: Toy Series, dtype: float64

print(s.loc[(s < 3) & (s > 1)])

2016-01-02    2.0
Freq: D, Name: Toy Series, dtype: float64

from quantrocket.master import get_securities
securities = get_securities(symbols='XOM', fields=['Sid','Symbol','Exchange'], vendors='usstock')
securities

from quantrocket import get_prices
XOM = securities.index[0]
start = "2012-01-01"
end = "2016-01-01"
prices = get_prices("usstock-free-1min", data_frequency="daily", sids=XOM, start_date=start, end_date=end, fields="Close")
prices = prices.loc["Close"][XOM]

print(type(prices))
prices.head(5)

<class 'pandas.core.series.Series'>

Date
2012-01-03    76.760
2012-01-04    76.778
2012-01-05    76.546
2012-01-06    75.975
2012-01-09    76.314
Name: FIBBG000GZQ728, dtype: float64

print('Old name:', prices.name)
prices.name = "XOM"
print('New name:', prices.name)

Old name: FIBBG000GZQ728
New name: XOM

print(prices.index)
print("tz:", prices.index.tz)

DatetimeIndex(['2012-01-03', '2012-01-04', '2012-01-05', '2012-01-06',
               '2012-01-09', '2012-01-10', '2012-01-11', '2012-01-12',
               '2012-01-13', '2012-01-17',
               ...
               '2015-12-17', '2015-12-18', '2015-12-21', '2015-12-22',
               '2015-12-23', '2015-12-24', '2015-12-28', '2015-12-29',
               '2015-12-30', '2015-12-31'],
              dtype='datetime64[ns]', name='Date', length=1006, freq=None)
tz: None

monthly_prices = prices.resample('M').last()
monthly_prices.head(10)

Date
2012-01-31    74.743
2012-02-29    77.629
2012-03-31    77.835
2012-04-30    77.485
2012-05-31    71.052
2012-06-30    77.323
2012-07-31    78.480
2012-08-31    79.399
2012-09-30    83.174
2012-10-31    82.919
Freq: M, Name: XOM, dtype: float64

monthly_prices_med = prices.resample('M').median()
monthly_prices_med.head(10)

Date
2012-01-31    76.5770
2012-02-29    76.6105
2012-03-31    77.2565
2012-04-30    76.6195
2012-05-31    74.2285
2012-06-30    74.1970
2012-07-31    77.2330
2012-08-31    79.7990
2012-09-30    82.9740
2012-10-31    83.4190
Freq: M, Name: XOM, dtype: float64

def custom_resampler(array_like):
    """ Returns the first value of the period """
    return array_like.iloc[0]

first_of_month_prices = prices.resample('M').apply(custom_resampler)
first_of_month_prices.head(10)

Date
2012-01-31    76.760
2012-02-29    74.948
2012-03-31    77.925
2012-04-30    78.140
2012-05-31    78.114
2012-06-30    70.411
2012-07-31    77.115
2012-08-31    78.534
2012-09-30    79.236
2012-10-31    83.492
Freq: M, Name: XOM, dtype: float64

eastern_prices = prices.tz_localize('America/New_York')
eastern_prices.head(10)

Date
2012-01-03 00:00:00-05:00    76.760
2012-01-04 00:00:00-05:00    76.778
2012-01-05 00:00:00-05:00    76.546
2012-01-06 00:00:00-05:00    75.975
2012-01-09 00:00:00-05:00    76.314
2012-01-10 00:00:00-05:00    76.510
2012-01-11 00:00:00-05:00    75.939
2012-01-12 00:00:00-05:00    75.635
2012-01-13 00:00:00-05:00    75.760
2012-01-17 00:00:00-05:00    76.483
Name: XOM, dtype: float64

calendar_dates = pd.date_range(start=start, end=end, freq='D')
print(calendar_dates)

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',
               '2012-01-09', '2012-01-10',
               ...
               '2015-12-23', '2015-12-24', '2015-12-25', '2015-12-26',
               '2015-12-27', '2015-12-28', '2015-12-29', '2015-12-30',
               '2015-12-31', '2016-01-01'],
              dtype='datetime64[ns]', length=1462, freq='D')

calendar_prices = prices.reindex(calendar_dates, method='ffill')
calendar_prices.head(15)

2012-01-01       NaN
2012-01-02       NaN
2012-01-03    76.760
2012-01-04    76.778
2012-01-05    76.546
2012-01-06    75.975
2012-01-07    75.975
2012-01-08    75.975
2012-01-09    76.314
2012-01-10    76.510
2012-01-11    75.939
2012-01-12    75.635
2012-01-13    75.760
2012-01-14    75.760
2012-01-15    75.760
Freq: D, Name: XOM, dtype: float64

meanfilled_prices = calendar_prices.fillna(calendar_prices.mean())
meanfilled_prices.head(10)

2012-01-01    84.152682
2012-01-02    84.152682
2012-01-03    76.760000
2012-01-04    76.778000
2012-01-05    76.546000
2012-01-06    75.975000
2012-01-07    75.975000
2012-01-08    75.975000
2012-01-09    76.314000
2012-01-10    76.510000
Freq: D, Name: XOM, dtype: float64

bfilled_prices = calendar_prices.bfill()
bfilled_prices.head(10)

2012-01-01    76.760
2012-01-02    76.760
2012-01-03    76.760
2012-01-04    76.778
2012-01-05    76.546
2012-01-06    75.975
2012-01-07    75.975
2012-01-08    75.975
2012-01-09    76.314
2012-01-10    76.510
Freq: D, Name: XOM, dtype: float64

dropped_prices = calendar_prices.dropna()
dropped_prices.head(10)

2012-01-03    76.760
2012-01-04    76.778
2012-01-05    76.546
2012-01-06    75.975
2012-01-07    75.975
2012-01-08    75.975
2012-01-09    76.314
2012-01-10    76.510
2012-01-11    75.939
2012-01-12    75.635
Freq: D, Name: XOM, dtype: float64

prices.plot();
# We still need to add the axis labels and title ourselves
plt.title("XOM Prices")
plt.ylabel("Price")
plt.xlabel("Date");

print("Mean:", prices.mean())
print("Standard deviation:", prices.std())

Mean: 84.12831908548708
Standard deviation: 6.59102021142679

print("Summary Statistics")
print(prices.describe())

Summary Statistics
count    1006.000000
mean       84.128319
std         6.591020
min        68.116000
25%        79.783250
50%        82.990000
75%        88.993250
max        99.502000
Name: XOM, dtype: float64

modified_prices = prices * 2 - 10
modified_prices.head(5)

Date
2012-01-03    143.520
2012-01-04    143.556
2012-01-05    143.092
2012-01-06    141.950
2012-01-09    142.628
Name: XOM, dtype: float64

noisy_prices = prices + 5 * pd.Series(np.random.normal(0, 5, len(prices)), index=prices.index) + 20
noisy_prices.head(5)

Date
2012-01-03     67.168340
2012-01-04    101.895873
2012-01-05     96.558114
2012-01-06    140.748484
2012-01-09     89.780228
dtype: float64

empty_series = prices + pd.Series(np.random.normal(0, 1, len(prices)))
empty_series.head(5)

2012-01-03 00:00:00   NaN
2012-01-04 00:00:00   NaN
2012-01-05 00:00:00   NaN
2012-01-06 00:00:00   NaN
2012-01-09 00:00:00   NaN
dtype: float64

add_returns = prices.diff()[1:]
mult_returns = prices.pct_change()[1:]

plt.title("Multiplicative returns of XOM")
plt.xlabel("Date")
plt.ylabel("Percent Returns")
mult_returns.plot();

rolling_mean = prices.rolling(30).mean()
rolling_mean.name = "30-day rolling mean"

prices.plot()
rolling_mean.plot()
plt.title("XOM Price")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend();

rolling_std = prices.rolling(30).std()
rolling_std.name = "30-day rolling volatility"

rolling_std.plot()
plt.title(rolling_std.name);
plt.xlabel("Date")
plt.ylabel("Standard Deviation");

print(np.median(mult_returns))

-0.00031201248049916863

print(mult_returns.median())

-0.00031201248049916863

dict_data = {
    'a' : [1, 2, 3, 4, 5],
    'b' : ['L', 'K', 'J', 'M', 'Z'],
    'c' : np.random.normal(0, 1, 5)
}
print(dict_data)

{'a': [1, 2, 3, 4, 5], 'b': ['L', 'K', 'J', 'M', 'Z'], 'c': array([-0.96525558, -0.17670317,  1.52158697, -0.55480652, -1.42119632])}

frame_data = pd.DataFrame(dict_data, index=pd.date_range('2016-01-01', periods=5))
print(frame_data)

            a  b         c
2016-01-01  1  L -0.965256
2016-01-02  2  K -0.176703
2016-01-03  3  J  1.521587
2016-01-04  4  M -0.554807
2016-01-05  5  Z -1.421196

s_1 = pd.Series([2, 4, 6, 8, 10], name='Evens')
s_2 = pd.Series([1, 3, 5, 7, 9], name="Odds")
numbers = pd.concat([s_1, s_2], axis=1)
print(numbers)

   Evens  Odds
0      2     1
1      4     3
2      6     5
3      8     7
4     10     9

print(numbers.columns)

Index(['Evens', 'Odds'], dtype='object')

numbers.columns = ['Shmevens', 'Shmodds']
print(numbers)

   Shmevens  Shmodds
0         2        1
1         4        3
2         6        5
3         8        7
4        10        9

print(numbers.index)

RangeIndex(start=0, stop=5, step=1)

numbers.index = pd.date_range("2016-01-01", periods=len(numbers))
print(numbers)

            Shmevens  Shmodds
2016-01-01         2        1
2016-01-02         4        3
2016-01-03         6        5
2016-01-04         8        7
2016-01-05        10        9

numbers.values

array([[ 2,  1],
       [ 4,  3],
       [ 6,  5],
       [ 8,  7],
       [10,  9]])

type(numbers.values)

numpy.ndarray

securities = get_securities(symbols=['XOM', 'JNJ', 'MON', 'KKD'], vendors='usstock')
securities

start = "2012-01-01"
end = "2017-01-01"

prices = get_prices("usstock-free-1min", data_frequency="daily", sids=securities.index.tolist(), start_date=start, end_date=end, fields="Close")
prices = prices.loc["Close"]
prices.head()

sids_to_symbols = securities.Symbol.to_dict()
prices = prices.rename(columns=sids_to_symbols)
prices.head()

prices.XOM.head()

Date
2012-01-03    74.141
2012-01-04    74.159
2012-01-05    73.934
2012-01-06    73.383
2012-01-09    73.710
Name: XOM, dtype: float64

prices["XOM"].head()

Date
2012-01-03    74.141
2012-01-04    74.159
2012-01-05    73.934
2012-01-06    73.383
2012-01-09    73.710
Name: XOM, dtype: float64

prices.loc[:, 'XOM'].head()

Date
2012-01-03    74.141
2012-01-04    74.159
2012-01-05    73.934
2012-01-06    73.383
2012-01-09    73.710
Name: XOM, dtype: float64

print(type(prices.XOM))
print(type(prices.loc[:, 'XOM']))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>

prices.loc[:, ['XOM', 'JNJ']].head()

prices.loc['2015-12-15':'2015-12-22']

prices.loc['2015-12-15':'2015-12-22', ['XOM', 'JNJ']]

prices.iloc[0:2, 1]

Date
2012-01-03    56.665
2012-01-04    56.321
Name: JNJ, dtype: float64

# Access prices with integer index in
# [1, 3, 5, 7, 9, 11, 13, ..., 99]
# and in column 0 or 2
prices.iloc[[1, 3, 5] + list(range(7, 100, 2)), [0, 2]].head(20)

prices.loc[prices.MON > prices.JNJ].head()

prices.loc[(prices.MON > prices.JNJ) & ~(prices.XOM > 66)].head()

securities = get_securities(symbols="AAPL", vendors='usstock')
securities

AAPL = securities.index[0]

s_1 = get_prices("usstock-free-1min", data_frequency="daily", sids=AAPL, start_date=start, end_date=end, fields='Close').loc["Close"][AAPL]
prices.loc[:, AAPL] = s_1
prices.head(5)

prices = prices.drop(AAPL, axis=1)
prices.head(5)

prices.plot()
plt.title("Collected Stock Prices")
plt.ylabel("Price")
plt.xlabel("Date");

prices.mean(axis=0)

Sid
MON    97.354828
JNJ    87.628015
KKD    15.453603
XOM    81.975611
dtype: float64

prices.std(axis=0)

Sid
MON    13.113086
JNJ    18.909646
KKD     5.016813
XOM     6.284654
dtype: float64

prices.describe()

(2 * prices - 50).head(5)

mult_returns = prices.pct_change()[1:]
mult_returns.head()

norm_returns = (mult_returns - mult_returns.mean(axis=0))/mult_returns.std(axis=0)
norm_returns.loc['2014-01-01':'2015-01-01'].plot();

rolling_mean = prices.rolling(30).mean()
rolling_mean.columns = prices.columns

rolling_mean.plot()
plt.title("Rolling Mean of Prices")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend();

	Symbol	Exchange	Country	Currency	SecType	Etf	Timezone	Name	PriceMagnifier	Multiplier	Delisted	DateDelisted	LastTradeDate	RolloverDate
Sid
FIBBG000BFWKC0	MON	XNYS	US	USD	STK	False	America/New_York	MONSANTO CO	1	1	True	2018-06-06	NaT	NaT
FIBBG000BMHYD1	JNJ	XNYS	US	USD	STK	False	America/New_York	JOHNSON & JOHNSON	1	1	False	NaT	NaT	NaT
FIBBG000CK38G3	KKD	XNYS	US	USD	STK	False	America/New_York	KRISPY KREME DOUGHNUTS INC	1	1	True	2016-07-27	NaT	NaT
FIBBG000GZQ728	XOM	XNYS	US	USD	STK	False	America/New_York	EXXON MOBIL CORP	1	1	False	NaT	NaT	NaT
FIBBG00YPSJ318	MON	XNAS	US	USD	STK	False	America/New_York	MONUMENT CIRCLE ACQUI-CL A	1	1	True	2022-12-23	NaT	NaT

Sid	FIBBG000BFWKC0	FIBBG000BMHYD1	FIBBG000CK38G3	FIBBG000GZQ728
Date
2012-01-03	66.096	56.665	6.52	74.141
2012-01-04	67.046	56.321	6.36	74.159
2012-01-05	70.746	56.252	6.42	73.934
2012-01-06	71.512	55.762	7.12	73.383
2012-01-09	72.425	55.848	7.31	73.710

Sid	MON	JNJ	KKD	XOM
Date
2015-12-15	93.491	101.268	15.12	76.720
2015-12-16	94.928	102.357	15.09	76.450
2015-12-17	93.608	100.791	14.86	75.300
2015-12-18	92.728	99.148	14.81	74.644
2015-12-21	93.305	98.788	14.98	74.624
2015-12-22	95.143	99.887	14.96	75.001

Sid	XOM	JNJ
Date
2015-12-15	76.720	101.268
2015-12-16	76.450	102.357
2015-12-17	75.300	100.791
2015-12-18	74.644	99.148
2015-12-21	74.624	98.788
2015-12-22	75.001	99.887

Sid	MON	KKD
Date
2012-01-04	67.046	6.36
2012-01-06	71.512	7.12
2012-01-10	73.071	7.19
2012-01-12	74.058	7.35
2012-01-17	74.399	6.96
2012-01-19	74.132	7.01
2012-01-23	73.707	6.91
2012-01-25	75.027	7.03
2012-01-27	74.298	7.65
2012-01-31	75.700	7.33
2012-02-02	75.774	7.68
2012-02-06	73.486	7.94
2012-02-08	73.015	8.03
2012-02-10	71.355	8.00
2012-02-14	70.986	8.33
2012-02-16	72.895	8.58
2012-02-21	73.237	8.61
2012-02-23	71.761	8.64
2012-02-27	72.739	7.99
2012-02-29	71.392	8.18

Introduction to pandas¶

pandas Data Structures¶

`Series`¶

Accessing `Series` Elements¶

Boolean Indexing¶

Indexing and Time Series¶

Missing Data¶

Time Series Analysis with pandas¶

`DataFrames`¶

Accessing `DataFrame` elements¶

Boolean indexing¶

Adding, Removing Columns, Combining `DataFrames`/`Series`¶

Time Series Analysis with pandas¶

Next Steps¶

	Symbol	Exchange	Country	Currency	SecType	Etf	Timezone	Name	PriceMagnifier	Multiplier	Delisted	DateDelisted	LastTradeDate	RolloverDate
Sid
FIBBG000B9XRY4	AAPL	XNAS	US	USD	STK	False	America/New_York	APPLE INC	1	1	False	NaT	NaT	NaT

Sid	MON	JNJ	KKD	XOM
count	1258.000000	1258.000000	1149.000000	1258.000000
mean	97.354828	87.628015	15.453603	81.975611
std	13.113086	18.909646	5.016813	6.284654
min	64.720000	54.130000	5.900000	65.792000
25%	88.425250	73.747500	13.000000	77.642500
50%	99.240500	93.021500	16.820000	81.023000
75%	107.470500	98.883750	19.320000	86.814500
max	120.811000	123.714000	26.510000	96.107000

Sid	MON	JNJ	KKD	XOM
Date
2012-01-03	82.192	63.330	-36.96	98.282
2012-01-04	84.092	62.642	-37.28	98.318
2012-01-05	91.492	62.504	-37.16	97.868
2012-01-06	93.024	61.524	-35.76	96.766
2012-01-09	94.850	61.696	-35.38	97.420

Sid	MON	JNJ	KKD	XOM
Date
2012-01-04	0.014373	-0.006071	-0.024540	0.000243
2012-01-05	0.055186	-0.001225	0.009434	-0.003034
2012-01-06	0.010827	-0.008711	0.109034	-0.007453
2012-01-09	0.012767	0.001542	0.026685	0.004456
2012-01-10	0.008920	0.004154	-0.016416	0.002578

Introduction to pandas¶

pandas Data Structures¶

Series¶

Accessing Series Elements¶

Boolean Indexing¶

Indexing and Time Series¶

Missing Data¶

Time Series Analysis with pandas¶

DataFrames¶

Accessing DataFrame elements¶

Boolean indexing¶

Adding, Removing Columns, Combining DataFrames/Series¶

Time Series Analysis with pandas¶

Next Steps¶

`Series`¶

Accessing `Series` Elements¶

`DataFrames`¶

Accessing `DataFrame` elements¶

Adding, Removing Columns, Combining `DataFrames`/`Series`¶