# New from the last lesson, import the EquityPricing dataset.
from zipline.pipeline import Pipeline, EquityPricing
from zipline.research import run_pipeline

# New from the last lesson, import the built-in SimpleMovingAverage factor.
from zipline.pipeline.factors import SimpleMovingAverage

mean_close_10 = SimpleMovingAverage(inputs=EquityPricing.close, window_length=10)

def make_pipeline():
    
    mean_close_10 = SimpleMovingAverage(inputs=EquityPricing.close, window_length=10)
    
    return Pipeline(
        columns={
            '10_day_mean_close': mean_close_10
        }
    )

result = run_pipeline(make_pipeline(), start_date='2010-01-05', end_date='2010-01-05')
result

result = run_pipeline(make_pipeline(), start_date='2010-01-05', end_date='2010-01-07')
result

my_pipe = Pipeline()
f1 = SomeFactor(...)
my_pipe.add(f1, 'f1')

def make_pipeline():

    mean_close_10 = SimpleMovingAverage(inputs=EquityPricing.close, window_length=10)
    latest_close = EquityPricing.close.latest

    return Pipeline(
        columns={
            '10_day_mean_close': mean_close_10,
            'latest_close_price': latest_close
        }
    )

result = run_pipeline(make_pipeline(), start_date='2010-01-05', end_date='2010-01-05')
result.head(5)

from zipline.pipeline.factors import VWAP
vwap = VWAP(window_length=10)

result = run_pipeline(Pipeline(), start_date='2007-01-03', end_date='2007-01-03')

---------------------------------------------------------------------------
ValidationError                           Traceback (most recent call last)
Cell In[9], line 1
----> 1 result = run_pipeline(Pipeline(), start_date='2007-01-03', end_date='2007-01-03')

File /opt/conda/lib/python3.11/site-packages/zipline/research/pipeline.py:95, in run_pipeline(pipeline, start_date, end_date, bundle)
     36 def run_pipeline(
     37     pipeline: Pipeline,
     38     start_date: str,
     39     end_date: str = None,
     40     bundle: str = None
     41     ) -> pd.DataFrame:
     42     """
     43     Compute values for pipeline from start_date to end_date, using the specified
     44     bundle or the default bundle.
   (...)
     93         factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min")
     94     """
---> 95     return _run_pipeline(
     96         pipeline,
     97         start_date=start_date,
     98         end_date=end_date,
     99         bundle=bundle)

File /opt/conda/lib/python3.11/site-packages/zipline/research/pipeline.py:149, in _run_pipeline(pipeline, start_date, end_date, bundle, mask)
    147 second_session = exchange_calendar.next_session(first_session)
    148 if start_date < second_session:
--> 149     raise ValidationError(
    150         f"start_date cannot be earlier than {second_session.date().isoformat()} "
    151         f"for this bundle (one session after the bundle start date of {first_session.date().isoformat()})")
    153 # Roll-forward start_date to valid session
    154 for i in range(100):

ValidationError: start_date cannot be earlier than 2007-01-04 for this bundle (one session after the bundle start date of 2007-01-03)

pipeline = Pipeline(
    columns={
        "vwap": VWAP(window_length=10)
    }
)

result = run_pipeline(pipeline, start_date='2007-01-04', end_date='2007-01-04')

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File index.pyx:598, in pandas._libs.index.DatetimeEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:2606, in pandas._libs.hashtable.Int64HashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:2630, in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 1166400000000000000

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/base.py:3790, in Index.get_loc(self, key)
   3789 try:
-> 3790     return self._engine.get_loc(casted_key)
   3791 except KeyError as err:

File index.pyx:566, in pandas._libs.index.DatetimeEngine.get_loc()

File index.pyx:600, in pandas._libs.index.DatetimeEngine.get_loc()

KeyError: Timestamp('2006-12-18 00:00:00')

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:631, in DatetimeIndex.get_loc(self, key)
    630 try:
--> 631     return Index.get_loc(self, key)
    632 except KeyError as err:

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/base.py:3797, in Index.get_loc(self, key)
   3796         raise InvalidIndexError(key)
-> 3797     raise KeyError(key) from err
   3798 except TypeError:
   3799     # If we have a listlike key, _check_indexing_error will raise
   3800     #  InvalidIndexError. Otherwise we fall through and re-raise
   3801     #  the TypeError.

KeyError: Timestamp('2006-12-18 00:00:00')

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/zipline/data/bcolz_daily_bars.py:578, in BcolzDailyBarReader._load_raw_arrays_date_to_index(self, date)
    577 try:
--> 578     return self.sessions.get_loc(date)
    579 except KeyError:

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:633, in DatetimeIndex.get_loc(self, key)
    632 except KeyError as err:
--> 633     raise KeyError(orig_key) from err

KeyError: Timestamp('2006-12-18 00:00:00')

During handling of the above exception, another exception occurred:

NoDataOnDate                              Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/zipline/pipeline/engine.py:763, in SimplePipelineEngine.compute_chunk(self, graph, dates, sids, workspace, refcounts, execution_order, hooks)
    762 try:
--> 763     loaded = loader.load_adjusted_array(
    764         domain, to_load, mask_dates, sids, mask,
    765     )
    766 except NoDataOnDate as e:

File /opt/conda/lib/python3.11/site-packages/zipline/pipeline/loaders/equity_pricing_loader.py:90, in EquityPricingLoader.load_adjusted_array(***failed resolving arguments***)
     88 ohlcv_colnames = [c.name for c in ohlcv_cols]
---> 90 raw_ohlcv_arrays = self.raw_price_reader.load_raw_arrays(
     91     ohlcv_colnames,
     92     shifted_dates[0],
     93     shifted_dates[-1],
     94     sids,
     95 )
     97 # Currency convert raw_arrays in place if necessary. We use shifted
     98 # dates to load currency conversion rates to make them line up with
     99 # dates used to fetch prices.

File /opt/conda/lib/python3.11/site-packages/zipline/data/bcolz_daily_bars.py:557, in BcolzDailyBarReader.load_raw_arrays(self, columns, start_date, end_date, assets)
    556 def load_raw_arrays(self, columns, start_date, end_date, assets):
--> 557     start_idx = self._load_raw_arrays_date_to_index(start_date)
    558     end_idx = self._load_raw_arrays_date_to_index(end_date)

File /opt/conda/lib/python3.11/site-packages/zipline/data/bcolz_daily_bars.py:580, in BcolzDailyBarReader._load_raw_arrays_date_to_index(self, date)
    579 except KeyError:
--> 580     raise NoDataOnDate(date)

NoDataOnDate: 2006-12-18 00:00:00

During handling of the above exception, another exception occurred:

NoDataOnDate                              Traceback (most recent call last)
Cell In[10], line 7
      1 pipeline = Pipeline(
      2     columns={
      3         "vwap": VWAP(window_length=10)
      4     }
      5 )
----> 7 result = run_pipeline(pipeline, start_date='2007-01-04', end_date='2007-01-04')

File /opt/conda/lib/python3.11/site-packages/zipline/research/pipeline.py:95, in run_pipeline(pipeline, start_date, end_date, bundle)
     36 def run_pipeline(
     37     pipeline: Pipeline,
     38     start_date: str,
     39     end_date: str = None,
     40     bundle: str = None
     41     ) -> pd.DataFrame:
     42     """
     43     Compute values for pipeline from start_date to end_date, using the specified
     44     bundle or the default bundle.
   (...)
     93         factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min")
     94     """
---> 95     return _run_pipeline(
     96         pipeline,
     97         start_date=start_date,
     98         end_date=end_date,
     99         bundle=bundle)

File /opt/conda/lib/python3.11/site-packages/zipline/research/pipeline.py:251, in _run_pipeline(pipeline, start_date, end_date, bundle, mask)
    248 if use_chunks:
    249     # Run in 1-years chunks to reduce memory usage
    250     chunksize = 252
--> 251     results = engine.run_chunked_pipeline(pipeline, start_date, end_date, chunksize=chunksize)
    252 else:
    253     results = engine.run_pipeline(pipeline, start_date, end_date)

File /opt/conda/lib/python3.11/site-packages/zipline/pipeline/engine.py:350, in SimplePipelineEngine.run_chunked_pipeline(self, pipeline, start_date, end_date, chunksize, hooks)
    348 run_pipeline = partial(self._run_pipeline_impl, pipeline, hooks=hooks)
    349 with hooks.running_pipeline(pipeline, start_date, end_date):
--> 350     chunks = [run_pipeline(s, e) for s, e in ranges]
    352 if len(chunks) == 1:
    353     # OPTIMIZATION: Don't make an extra copy in `categorical_df_concat`
    354     # if we don't have to.
    355     return chunks[0]

File /opt/conda/lib/python3.11/site-packages/zipline/pipeline/engine.py:350, in <listcomp>(.0)
    348 run_pipeline = partial(self._run_pipeline_impl, pipeline, hooks=hooks)
    349 with hooks.running_pipeline(pipeline, start_date, end_date):
--> 350     chunks = [run_pipeline(s, e) for s, e in ranges]
    352 if len(chunks) == 1:
    353     # OPTIMIZATION: Don't make an extra copy in `categorical_df_concat`
    354     # if we don't have to.
    355     return chunks[0]

File /opt/conda/lib/python3.11/site-packages/zipline/pipeline/engine.py:440, in SimplePipelineEngine._run_pipeline_impl(self, pipeline, start_date, end_date, hooks)
    434 execution_order = plan.execution_order(workspace, refcounts)
    436 with hooks.computing_chunk(execution_order,
    437                            start_date,
    438                            end_date):
--> 440     results = self.compute_chunk(
    441         graph=plan,
    442         dates=dates,
    443         sids=sids,
    444         workspace=workspace,
    445         refcounts=refcounts,
    446         execution_order=execution_order,
    447         hooks=hooks,
    448     )
    450 return self._to_narrow(
    451     plan.outputs,
    452     results,
   (...)
    455     sids,
    456 )

File /opt/conda/lib/python3.11/site-packages/zipline/pipeline/engine.py:777, in SimplePipelineEngine.compute_chunk(self, graph, dates, sids, workspace, refcounts, execution_order, hooks)
    767         extra_rows = graph.extra_rows[term]
    768         msg = (
    769             f"the pipeline definition requires {term} data on {str(e)} but no bundle data is "
    770             "available on that date; the cause of this issue is that another pipeline term needs "
   (...)
    775             f"the problem:\n\n{repr(graph)}"
    776         )
--> 777         raise NoDataOnDate(msg)
    778 assert set(loaded) == set(to_load), (
    779     'loader did not return an AdjustedArray for each column\n'
    780     'expected: %r\n'
   (...)
    784     )
    785 )
    786 workspace.update(loaded)

NoDataOnDate: the pipeline definition requires EquityPricing<US>.close::float64 data on 2006-12-18 00:00:00 but no bundle data is available on that date; the cause of this issue is that another pipeline term needs EquityPricing<US>.close::float64 and has a window_length of 10, which necessitates loading 9 extra rows of EquityPricing<US>.close::float64; try setting a later start date so that the maximum window_length of any term doesn't extend further back than the bundle start date. Review the pipeline dependencies below to help determine which terms are causing the problem:

{'dependencies': [{'term': EquityPricing<US>.close::float64,
                   'used_by': VWAP([EquityPricing.close, EquityPricing.volume], 10)},
                  {'term': EquityPricing<US>.volume::float64,
                   'used_by': VWAP([EquityPricing.close, EquityPricing.volume], 10)}],
 'nodes': [{'extra_rows': 9, 'needed_for': EquityPricing<US>.close::float64},
           {'extra_rows': 9, 'needed_for': EquityPricing<US>.volume::float64}]}

result = run_pipeline(pipeline, start_date='2007-01-18', end_date='2007-01-18')

		10_day_mean_close
date	asset
2010-01-05	Equity(FIBBG000C2V3D6 [A])	30.432000
	Equity(QI000000004076 [AABA])	16.605000
	Equity(FIBBG000BZWHH8 [AACC])	6.434000
	Equity(FIBBG000V2S3P6 [AACG])	4.501444
	Equity(FIBBG000M7KQ09 [AAI])	5.250000
	...	...
	Equity(FIBBG011MC2100 [AATC])	11.980500
	Equity(FIBBG000GDBDH4 [BDG])	NaN
	Equity(FIBBG000008NR0 [ISM])	NaN
	Equity(FIBBG000GZ24W8 [PEM])	NaN
	Equity(FIBBG000BB5S87 [HCH])	106.570000

		10_day_mean_close
date	asset
2010-01-05	Equity(FIBBG000C2V3D6 [A])	30.432000
	Equity(QI000000004076 [AABA])	16.605000
	Equity(FIBBG000BZWHH8 [AACC])	6.434000
	Equity(FIBBG000V2S3P6 [AACG])	4.501444
	Equity(FIBBG000M7KQ09 [AAI])	5.250000
...	...	...
2010-01-07	Equity(FIBBG011MC2100 [AATC])	11.816000
	Equity(FIBBG000GDBDH4 [BDG])	NaN
	Equity(FIBBG000008NR0 [ISM])	NaN
	Equity(FIBBG000GZ24W8 [PEM])	NaN
	Equity(FIBBG000BB5S87 [HCH])	109.796667

		10_day_mean_close	latest_close_price
date	asset
2010-01-05	Equity(FIBBG000C2V3D6 [A])	30.432000	31.300
	Equity(QI000000004076 [AABA])	16.605000	17.100
	Equity(FIBBG000BZWHH8 [AACC])	6.434000	7.150
	Equity(FIBBG000V2S3P6 [AACG])	4.501444	4.702
	Equity(FIBBG000M7KQ09 [AAI])	5.250000	5.180

Factors¶

Creating a Factor¶

Adding a Factor to a Pipeline¶

Latest¶

Default Inputs¶

Choosing a Start Date¶