Hello.
Here's an example of the load_data and window functions.
import qnt.data as qndata
def load_data(period):
index_data = qndata.index.load_data(tail=period)
stocks = qndata.stocks.load_ndx_data(tail=period, assets=["NAS:AAPL", "NAS:AMZN"])
return {"index_data": index_data, "stocks": stocks}, stocks.time.values
# futures = qndata.futures.load_data(tail=period, assets=["F_DX"]).isel(asset=0)
def window(data, max_date: np.datetime64, lookback_period: int):
min_date = max_date - np.timedelta64(lookback_period, "D")
return {
"index_data": data["index_data"].sel(time=slice(min_date, max_date)),
"stocks": data["stocks"].sel(time=slice(min_date, max_date)),
}
weights = qnbt.backtest_ml(
load_data= load_data,
window= window,
...
)
Inside the functions, for accessing the data, use
data["stocks"] instead of data.
You can check out an example of Machine Learning - predicting BTC futures using IMF Commodity Data at
https://github.com/quantiacs/strategy-ml-predict-BTC-use-IMF/blob/master/strategy.ipynb
If you modify the Machine Learning with a Voting Classifier example at
https://github.com/quantiacs/strategy-ml-voting-crypto/blob/master/strategy.ipynb
The use of functions will look like this.
import qnt.data as qndata
import qnt.ta as qnta
def load_data(period):
index_data = qndata.index.load_data(tail=period)
# futures = qndata.futures.load_data(tail=period, assets=["F_DX"]).isel(asset=0)
stocks = qndata.stocks.load_ndx_data(tail=period, assets=["NAS:AAPL", "NAS:AMZN"])
return {"index_data": index_data, "stocks": stocks}, stocks.time.values
def window(data, max_date: np.datetime64, lookback_period: int):
min_date = max_date - np.timedelta64(lookback_period, "D")
return {
"index_data": data["index_data"].sel(time=slice(min_date, max_date)),
"stocks": data["stocks"].sel(time=slice(min_date, max_date)),
}
def create_and_train_models(data):
"""Create and train the models working on an asset-by-asset basis."""
asset_name_all = data["stocks"].coords['asset'].values
data_scile = data["stocks"].sel(time=slice('2013-05-01',None)) # cut the noisy data head before 2013-05-01
features_all = get_features(data_scile)
target_all = get_target_classes(data_scile)
models = dict()
for asset_name in asset_name_all:
# drop missing values:
target_cur = target_all.sel(asset=asset_name).dropna('time', 'any')
features_cur = features_all.sel(asset=asset_name).dropna('time', 'any')
# align features and targets:
target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner')
if len(features_cur.time) < 10:
# not enough points for training
continue
model = create_model()
try:
model.fit(feature_for_learn_df.values, target_for_learn_df)
models[asset_name] = model
except KeyboardInterrupt as e:
raise e
except:
logging.exception('model training failed')
return models
def predict(models, data):
close_index = data["index_data"].sel(asset ="SPX")
close_stocks = data["stocks"].sel(field="close")
sma20 = qnta.sma(close_index, 20)
sma40 = qnta.sma(close_index, 40)
weights_index = xr.where(sma20 < sma40, 1, 0)
asset_name_all = data["stocks"].coords['asset'].values
weights = xr.zeros_like(data["stocks"].sel(field='close'))
for asset_name in asset_name_all:
if asset_name in models:
model = models[asset_name]
features_all = get_features(data["stocks"])
features_cur = features_all.sel(asset=asset_name).dropna('time','any')
if len(features_cur.time) < 1:
continue
try:
weights.loc[dict(asset=asset_name,time=features_cur.time.values)] = model.predict(features_cur.values)
except KeyboardInterrupt as e:
raise e
except:
logging.exception('model prediction failed')
return weights * weights_index
weights = qnbt.backtest_ml(
load_data= load_data,
window= window,
train=create_and_train_models,
predict=predict,
train_period=10*365, # the data length for training in calendar days
retrain_interval=10*365, # how often we have to retrain models (calendar days)
retrain_interval_after_submit=1, # how often retrain models after submission during evaluation (calendar days)
predict_each_day=False, # Is it necessary to call prediction for every day during backtesting?
# Set it to true if you suspect that get_features is looking forward.
competition_type='stocks_nasdaq100', # competition type
lookback_period=365, # how many calendar days are needed by the predict function to generate the output
start_date='2020-01-01', # backtest start date
build_plots=True # do you need the chart?
)