How to load data to work with Multi-backtesting_ml

cyan.gloom

Hello !
I'd like to know how to load 2 kind of data such as 'stocks' and 'index' in order to work with Multi-backtesting_ml.

def load_data(period):
    stocks = qndata.stocks.load_ndx_data(tail=period)
    index = qndata.index.load_data(tail=period)
    
    return stocks, index

weights = qnbt.backtest_ml(
    load_data                     = load_data,
    train                         = train_model,
    predict                       = predict_weights,
    train_period                  =  15 *365,  # the data length for training in calendar days
    retrain_interval              = 1 *365,  # how often we have to retrain models (calendar days)
    retrain_interval_after_submit = 1,        # how often retrain models after submission during evaluation (calendar days)
    predict_each_day              = False,    # Is it necessary to call prediction for every day during backtesting?
                                              # Set it to True if you suspect that get_features is looking forward.
    competition_type              = "stocks_nasdaq100",  # competition type
    lookback_period               = 365,                 # how many calendar days are needed by the predict function to generate the output
    start_date                    = "2006-01-01",        # backtest start date
    analyze                       = True,
    build_plots                   = True  # do you need the chart?
)

What should I do ?

Best regards,

Vyacheslav_B

Hello.

Here's an example of the load_data and window functions.

import qnt.data as qndata

def load_data(period):
    index_data = qndata.index.load_data(tail=period)
    stocks  = qndata.stocks.load_ndx_data(tail=period, assets=["NAS:AAPL", "NAS:AMZN"])
    return {"index_data": index_data, "stocks": stocks}, stocks.time.values
    
    #     futures = qndata.futures.load_data(tail=period, assets=["F_DX"]).isel(asset=0)



def window(data, max_date: np.datetime64, lookback_period: int):
    min_date = max_date - np.timedelta64(lookback_period, "D")
    return {
        "index_data": data["index_data"].sel(time=slice(min_date, max_date)),
        "stocks":  data["stocks"].sel(time=slice(min_date, max_date)),
    }

weights = qnbt.backtest_ml(
    load_data= load_data,
    window= window,
    ...
)

Inside the functions, for accessing the data, use
data["stocks"] instead of data.

You can check out an example of Machine Learning - predicting BTC futures using IMF Commodity Data at
https://github.com/quantiacs/strategy-ml-predict-BTC-use-IMF/blob/master/strategy.ipynb

If you modify the Machine Learning with a Voting Classifier example at
https://github.com/quantiacs/strategy-ml-voting-crypto/blob/master/strategy.ipynb

The use of functions will look like this.


import qnt.data as qndata
import qnt.ta as qnta

def load_data(period):
    index_data = qndata.index.load_data(tail=period)
#     futures = qndata.futures.load_data(tail=period, assets=["F_DX"]).isel(asset=0)
    stocks  = qndata.stocks.load_ndx_data(tail=period, assets=["NAS:AAPL", "NAS:AMZN"])
    return {"index_data": index_data, "stocks": stocks}, stocks.time.values



def window(data, max_date: np.datetime64, lookback_period: int):
    min_date = max_date - np.timedelta64(lookback_period, "D")
    return {
        "index_data": data["index_data"].sel(time=slice(min_date, max_date)),
        "stocks":  data["stocks"].sel(time=slice(min_date, max_date)),
    }

def create_and_train_models(data):
    """Create and train the models working on an asset-by-asset basis."""
    
    asset_name_all = data["stocks"].coords['asset'].values

    data_scile = data["stocks"].sel(time=slice('2013-05-01',None)) # cut the noisy data head before 2013-05-01

    features_all = get_features(data_scile)
    target_all = get_target_classes(data_scile)

    models = dict()
    

    for asset_name in asset_name_all:
        
        # drop missing values:
        target_cur = target_all.sel(asset=asset_name).dropna('time', 'any')
        features_cur = features_all.sel(asset=asset_name).dropna('time', 'any')

        # align features and targets:
        target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner')
        
        if len(features_cur.time) < 10:
            # not enough points for training
            continue

        model = create_model()
        
        try:
            model.fit(feature_for_learn_df.values, target_for_learn_df)
            models[asset_name] = model
        except KeyboardInterrupt as e:
            raise e
        except:
            logging.exception('model training failed')

    return models

def predict(models, data):
    close_index = data["index_data"].sel(asset ="SPX")
    close_stocks  = data["stocks"].sel(field="close")
    sma20 = qnta.sma(close_index, 20)
    sma40 = qnta.sma(close_index, 40)
    weights_index = xr.where(sma20 < sma40, 1, 0)
    
    asset_name_all = data["stocks"].coords['asset'].values
    weights = xr.zeros_like(data["stocks"].sel(field='close'))
    
    for asset_name in asset_name_all:
        if asset_name in models:
            model = models[asset_name]
            features_all = get_features(data["stocks"])
            features_cur = features_all.sel(asset=asset_name).dropna('time','any')
            if len(features_cur.time) < 1:
                continue
            try:
                weights.loc[dict(asset=asset_name,time=features_cur.time.values)] = model.predict(features_cur.values)
            except KeyboardInterrupt as e:
                raise e
            except:
                logging.exception('model prediction failed')
    
                
    return weights * weights_index



weights = qnbt.backtest_ml(
    load_data= load_data,
    window= window,
    train=create_and_train_models,
    predict=predict,
    train_period=10*365,   # the data length for training in calendar days
    retrain_interval=10*365,  # how often we have to retrain models (calendar days)
    retrain_interval_after_submit=1, # how often retrain models after submission during evaluation (calendar days)
    predict_each_day=False,  # Is it necessary to call prediction for every day during backtesting?
                             # Set it to true if you suspect that get_features is looking forward.
    competition_type='stocks_nasdaq100',  # competition type
    lookback_period=365,      # how many calendar days are needed by the predict function to generate the output
    start_date='2020-01-01',  # backtest start date
    build_plots=True          # do you need the chart?
)

cyan.gloom

@vyacheslav_b

Thanks for your help !

I have an error below.
Screenshot (23).png

What is 'state' ?
What should I give positional argument ?

Best regards,

Vyacheslav_B

@cyan-gloom

Hello. The provided code is insufficient to understand the problem.

I assume that a certain function might not be returning the required value (for instance, the function where your model is being created).

I recommend that you check all return values of functions, using tools like display or print. Then, compare them with what is returned in properly working examples.

The state allows you to use data from previous iterations. You can find an example here:
https://github.com/quantiacs/toolbox/blob/2f4c42e33c7ce789dfad5d170444fd542e28c8ae/qnt/examples/004-strategy-futures-multipass-stateful.py