How to load data to work with Multi-backtesting_ml
-
Hello !
I'd like to know how to load 2 kind of data such as 'stocks' and 'index' in order to work with Multi-backtesting_ml.def load_data(period): stocks = qndata.stocks.load_ndx_data(tail=period) index = qndata.index.load_data(tail=period) return stocks, index weights = qnbt.backtest_ml( load_data = load_data, train = train_model, predict = predict_weights, train_period = 15 *365, # the data length for training in calendar days retrain_interval = 1 *365, # how often we have to retrain models (calendar days) retrain_interval_after_submit = 1, # how often retrain models after submission during evaluation (calendar days) predict_each_day = False, # Is it necessary to call prediction for every day during backtesting? # Set it to True if you suspect that get_features is looking forward. competition_type = "stocks_nasdaq100", # competition type lookback_period = 365, # how many calendar days are needed by the predict function to generate the output start_date = "2006-01-01", # backtest start date analyze = True, build_plots = True # do you need the chart? )
What should I do ?
Best regards,
-
Hello.
Here's an example of the load_data and window functions.
import qnt.data as qndata def load_data(period): index_data = qndata.index.load_data(tail=period) stocks = qndata.stocks.load_ndx_data(tail=period, assets=["NAS:AAPL", "NAS:AMZN"]) return {"index_data": index_data, "stocks": stocks}, stocks.time.values # futures = qndata.futures.load_data(tail=period, assets=["F_DX"]).isel(asset=0) def window(data, max_date: np.datetime64, lookback_period: int): min_date = max_date - np.timedelta64(lookback_period, "D") return { "index_data": data["index_data"].sel(time=slice(min_date, max_date)), "stocks": data["stocks"].sel(time=slice(min_date, max_date)), } weights = qnbt.backtest_ml( load_data= load_data, window= window, ... )
Inside the functions, for accessing the data, use
data["stocks"] instead of data.You can check out an example of Machine Learning - predicting BTC futures using IMF Commodity Data at
https://github.com/quantiacs/strategy-ml-predict-BTC-use-IMF/blob/master/strategy.ipynbIf you modify the Machine Learning with a Voting Classifier example at
https://github.com/quantiacs/strategy-ml-voting-crypto/blob/master/strategy.ipynbThe use of functions will look like this.
import qnt.data as qndata import qnt.ta as qnta def load_data(period): index_data = qndata.index.load_data(tail=period) # futures = qndata.futures.load_data(tail=period, assets=["F_DX"]).isel(asset=0) stocks = qndata.stocks.load_ndx_data(tail=period, assets=["NAS:AAPL", "NAS:AMZN"]) return {"index_data": index_data, "stocks": stocks}, stocks.time.values def window(data, max_date: np.datetime64, lookback_period: int): min_date = max_date - np.timedelta64(lookback_period, "D") return { "index_data": data["index_data"].sel(time=slice(min_date, max_date)), "stocks": data["stocks"].sel(time=slice(min_date, max_date)), } def create_and_train_models(data): """Create and train the models working on an asset-by-asset basis.""" asset_name_all = data["stocks"].coords['asset'].values data_scile = data["stocks"].sel(time=slice('2013-05-01',None)) # cut the noisy data head before 2013-05-01 features_all = get_features(data_scile) target_all = get_target_classes(data_scile) models = dict() for asset_name in asset_name_all: # drop missing values: target_cur = target_all.sel(asset=asset_name).dropna('time', 'any') features_cur = features_all.sel(asset=asset_name).dropna('time', 'any') # align features and targets: target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner') if len(features_cur.time) < 10: # not enough points for training continue model = create_model() try: model.fit(feature_for_learn_df.values, target_for_learn_df) models[asset_name] = model except KeyboardInterrupt as e: raise e except: logging.exception('model training failed') return models def predict(models, data): close_index = data["index_data"].sel(asset ="SPX") close_stocks = data["stocks"].sel(field="close") sma20 = qnta.sma(close_index, 20) sma40 = qnta.sma(close_index, 40) weights_index = xr.where(sma20 < sma40, 1, 0) asset_name_all = data["stocks"].coords['asset'].values weights = xr.zeros_like(data["stocks"].sel(field='close')) for asset_name in asset_name_all: if asset_name in models: model = models[asset_name] features_all = get_features(data["stocks"]) features_cur = features_all.sel(asset=asset_name).dropna('time','any') if len(features_cur.time) < 1: continue try: weights.loc[dict(asset=asset_name,time=features_cur.time.values)] = model.predict(features_cur.values) except KeyboardInterrupt as e: raise e except: logging.exception('model prediction failed') return weights * weights_index weights = qnbt.backtest_ml( load_data= load_data, window= window, train=create_and_train_models, predict=predict, train_period=10*365, # the data length for training in calendar days retrain_interval=10*365, # how often we have to retrain models (calendar days) retrain_interval_after_submit=1, # how often retrain models after submission during evaluation (calendar days) predict_each_day=False, # Is it necessary to call prediction for every day during backtesting? # Set it to true if you suspect that get_features is looking forward. competition_type='stocks_nasdaq100', # competition type lookback_period=365, # how many calendar days are needed by the predict function to generate the output start_date='2020-01-01', # backtest start date build_plots=True # do you need the chart? )
-
Thanks for your help !
I have an error below.
What is 'state' ?
What should I give positional argument ?Best regards,
-
Hello. The provided code is insufficient to understand the problem.
I assume that a certain function might not be returning the required value (for instance, the function where your model is being created).
I recommend that you check all return values of functions, using tools like display or print. Then, compare them with what is returned in properly working examples.
The state allows you to use data from previous iterations. You can find an example here:
https://github.com/quantiacs/toolbox/blob/2f4c42e33c7ce789dfad5d170444fd542e28c8ae/qnt/examples/004-strategy-futures-multipass-stateful.py