from quantrocket.history import create_custom_db

DB_NAME = "imported-futures-1d" # store for later in notebook

create_custom_db(DB_NAME, bar_size="1d", columns={
    "Open": "float",
    "High": "float",
    "Low": "float",
    "Close": "float",
    "Volume": "int",
    "OpenInterest": "int"
})

{'status': 'successfully created quantrocket.v2.history.imported-futures-1d.sqlite'}

{
    ('XCME', 'ESU08'): 'QF000000654947', 
    ('XCME', 'ESZ08'): 'QF000000654948',
    ...
}

# map of (MIC, symbol): sid
mic_and_symbol_to_sid = {
}

from codeload.futures_import.utils import MONTH_CODES_TO_MONTH_NUMS
import yaml

with open("contract_chains/exchanges_to_mics.yml") as f:
    EXCHANGES_TO_MICS = yaml.safe_load(f)

import os
import glob
import pandas as pd
from IPython.display import clear_output
from quantrocket.master import get_securities

# If a root symbol in the source file doesn't match the ibkr_TradingClass in
# the securities master database, you can manually add the mapping here
ROOT_SYMBOL_TO_IBKR_TRADING_CLASS = {
    # data file root symbol: securities master ibkr_TradingClass
    # for example:
    # "E6": "6E",
}

SYMBOL_FIELDNAME = "symbol" # the name of the field in the CSV files that contains the symbol

class DataMapper:

    def run(self):
        """
        This is the main method that calls all the other methods.
        """

        for mic, data in self.load_source_files():

            contracts = self.load_contracts_for_mic(mic)

            symbols = data[SYMBOL_FIELDNAME].unique()

            for symbol in symbols:

                # Skip if already mapped
                if (mic, symbol) in mic_and_symbol_to_sid:
                    continue

                self.map_symbol(mic, symbol, contracts)

    def load_source_files(self):
        """
        Load source files and return a generator of tuples of
        the form (mic, data).

        Returns
        -------
        tuple of str and DataFrame
            The MIC and DataFrame content of each source file.
        """
        # Get a list of all CSV files in the historical_data directory
        filepaths = glob.glob("historical_data/*.csv")

        for filepath in filepaths:

            filename = os.path.basename(filepath)

            # In the example files, the exchange is the first part of
            # the filename; extract it
            exchange = filename.split("_")[0]

            if exchange not in EXCHANGES_TO_MICS:
                raise ValueError(
                    f"Exchange {exchange} not found in EXCHANGES_TO_MICS, please add it")

            mic = EXCHANGES_TO_MICS[exchange]

            # Load the CSV file into a DataFrame
            data = pd.read_csv(filepath)

            yield mic, data

    def load_contracts_for_mic(self, mic):
        """
        Return a DataFrame of contracts from the securities master database
        for the specified MIC.

        Parameters
        ----------
        mic : str
            The MIC to load contracts for.

        Returns
        -------
        DataFrame
            The contracts.
        """
        contracts = get_securities(
            exchanges=mic,
            sec_types="FUT",
            fields=[
                "ibkr_LocalSymbol", # e.g. ESU8 or ESU08 (year may be 1 digit or 2 digits)
                "ibkr_TradingClass", # e.g. ES
                "ibkr_ContractMonth", # e.g. 200809
                "ibkr_LastTradeDate", # e.g. 2008-09-16
                "ibkr_LongName", # e.g. E-mini S&P 500
            ]
        ).sort_values("ibkr_LastTradeDate")
        return contracts

    def map_symbol(self, mic, symbol, contracts):
        """
        Map the symbol from the source files to the QuantRocket SID and
        store the mapping in the mic_and_symbol_to_sid dictionary. Obtain
        user confirmation of the mapping.

        Parameters
        ----------
        mic : str
            The MIC of the symbol.

        symbol : str
            The symbol from the source file. E.g. ESU08.

        contracts : DataFrame
            The contracts from the securities master database for this MIC.

        Returns
        -------
        None
        """
        # Extract the root symbol from contract symbol (= everything but
        # last 3 characters)
        root_symbol = symbol[:-3]

        # respect manual mappings
        if root_symbol in ROOT_SYMBOL_TO_IBKR_TRADING_CLASS:
            root_symbol = ROOT_SYMBOL_TO_IBKR_TRADING_CLASS[root_symbol]

        # Extract the month code from the contract symbol (= 3rd to last
        # character)
        month_code = symbol[-3]
        # look up month num from month code
        month_num = MONTH_CODES_TO_MONTH_NUMS[month_code]
        # pad month_num: 1 -> 01
        month = str(month_num).zfill(2)

        # Extract the 2-digit year from the contract symbol (= last 2
        # characters)
        two_digit_year = symbol[-2:]
        # prefix two_digit_year with century 19 or 20
        if int(two_digit_year) < 50:
            year = "20" + two_digit_year
        else:
            year = "19" + two_digit_year

        contract_month = int(year + month)

        # Match on root symbol and contract month
        matching_contracts = contracts[
            (contracts.ibkr_TradingClass == root_symbol)
            &
            (contracts.ibkr_ContractMonth == contract_month)
        ]

        if matching_contracts.empty:
            raise ValueError(
                f"No matching contracts found for {mic} symbol {symbol}, you can run "
                f"DataMapper().load_contracts_for_mic('{mic}') to see the available contracts "
                "for this mic")

        # if there is exactly one match, show it and ask for confirmation
        if len(matching_contracts) == 1:
            match = matching_contracts.reset_index().iloc[0]
            print(f"found 1 matching sid for {mic} symbol {symbol}\n")
            print(match.to_string())
            answer = input(f"assign this sid to this symbol? y/n")
            if answer != "y":
                raise KeyboardInterrupt("You entered something other than y")
            sid = match.Sid

        # if there are multiple matches, show them and ask user to select
        else:
            print(f"found {len(matching_contracts)} matching sids for {mic} symbol {symbol}\n")
            print(matching_contracts.to_string())
            sid = input(f"enter the sid to assign to this symbol")
            if sid not in matching_contracts.index:
                raise KeyboardInterrupt(f"{sid} is not one of the matching sids")

        mic_and_symbol_to_sid[(mic, symbol)] = sid

        clear_output()

DataMapper().run()

mic_and_symbol_to_sid

{('XCME', 'ESU08'): 'QF000000654947', ('XCME', 'ESZ08'): 'QF000000654948'}

from quantrocket.db import connect_sqlite, insert_or_replace, list_databases

# look up the custom database path and open a SQLite connection to it
db_path = list_databases(
    services="history",
    codes=DB_NAME,
    detail=True)["sqlite"][0]["path"]

db_conn = connect_sqlite(db_path)

class DataLoader(DataMapper):

    def run(self):
        """
        Load each source file, append the sids from mic_and_symbol_to_sid,
        and insert the data into the custom database.
        """
        for mic, data in self.load_source_files():

            symbols = data[SYMBOL_FIELDNAME].unique()
            data["Sid"] = None

            for symbol in symbols:
                # lookup sid for symbol and set it in DataFrame
                sid = mic_and_symbol_to_sid[(mic, symbol)]
                data.loc[data[SYMBOL_FIELDNAME] == symbol, "Sid"] = sid

            # further data prep
            data = self.prepare_data(data)

            # get user confirmation that everything looks okay
            print(data.head().to_string())
            answer = input(f"look okay to insert {len(data)} records? y/n")
            if answer != "y":
                raise KeyboardInterrupt("You entered something other than y")

            insert_or_replace(data, "Price", db_conn)

            clear_output()

    def prepare_data(self, data):
        """
        Perform data prep as discussed in the 'Prepare custom data' section of the
        usage guide: https://www.quantrocket.com/docs/#custom-data-prepare-custom-data

        Specifically:

        - rename columns
        - parse date column
        - drop extraneous columns

        Parameters
        ----------
        data : DataFrame
            The data to prepare.

        Returns
        -------
        DataFrame
            The prepared data.
        """

        # rename columns
        data = data.rename(columns={
            # source file column name: custom database column name
            "date": "Date",
            "open": "Open",
            "high": "High",
            "low": "Low",
            "close": "Close",
            "volume": "Volume",
            "open_interest": "OpenInterest",
        })

        # parse date column (if intraday, include a timezone as shown in the
        # usage guide)
        data["Date"] = pd.to_datetime(data["Date"])

        # the symbol column is no longer needed
        data = data.drop(SYMBOL_FIELDNAME, axis=1)

        return data

DataLoader().run()

        Date     Open     High      Low    Close   Volume  OpenInterest             Sid
0 2008-09-18  1163.75  1213.25  1133.00  1198.25  6013113       2380251  QF000000654947
1 2008-09-17  1215.50  1227.75  1154.50  1160.75  5915836       3490395  QF000000654947
2 2008-09-16  1196.75  1219.00  1161.75  1214.25  6078506       3205598  QF000000654947
3 2008-09-15  1230.25  1237.75  1194.50  1195.00  5191192       3050694  QF000000654947
4 2008-09-12  1251.00  1257.75  1233.50  1257.25  3648404       2809352  QF000000654947
look okay to insert 627 records? y/n y

Load Historical Data¶

Create custom database¶

Upload Historical Futures Data¶

Map Symbols to Sids¶

Potential Gotchas¶

Load Data into Custom Database¶

Next Up¶