Help me in using Voting Classifier ML on Futures
-
Hi!
This is the default voting classifier strategy for cryptos:
import logging import pandas as pd import xarray as xr import numpy as np import qnt.backtester as qnbt import qnt.ta as qnta def create_model(): """This is a constructor for the ML model which can be easily modified using different models or another logic for the combination. """ from sklearn.ensemble import VotingClassifier from sklearn.linear_model import SGDClassifier, RidgeClassifier import random # We will use a model obtained combining by voting Ridge Classifiers and SGD Classifiers # which use several random seeds to reduce overfitting: classifiers = [] r = random.Random(13) for i in range(42): classifiers.append(('ridge' + str(i), RidgeClassifier(random_state=r.randint(0, pow(2, 32) - 1)),)) classifiers.append(('sgd' + str(i), SGDClassifier(random_state=r.randint(0, pow(2, 32) - 1)),)) model = VotingClassifier(classifiers) return model def get_features(data): """Builds the features used for learning: * a trend indicator; * the stochastic oscillator; * volatility; * volume. """ trend = qnta.roc(qnta.lwma(data.sel(field='close'), 70), 1) # stochastic oscillator: k, d = qnta.stochastic(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'), 14) volatility = qnta.tr(data.sel(field='high'), data.sel(field='low'), data.sel(field='close')) volatility = volatility / data.sel(field='close') volatility = qnta.lwma(volatility, 14) volume = data.sel(field='vol') volume = qnta.sma(volume, 5) / qnta.sma(volume, 60) volume = volume.where(np.isfinite(volume), 0) # combine the selected four features: result = xr.concat( [trend, d, volatility, volume], pd.Index( ['trend', 'stochastic_d', 'volatility', 'volume'], name = 'field' ) ) return result.transpose('time', 'field', 'asset') def get_target_classes(data): """Builds target classes which will be later predicted.""" price_current = data.sel(field='close') price_future = qnta.shift(price_current, -1) class_positive = 1 class_negative = 0 target_is_price_up = xr.where(price_future > price_current, class_positive, class_negative) return target_is_price_up def create_and_train_models(data): """Create and train the models working on an asset-by-asset basis.""" asset_name_all = data.coords['asset'].values data = data.sel(time=slice('2013-05-01',None)) # cut the noisy data head before 2013-05-01 features_all = get_features(data) target_all = get_target_classes(data) models = dict() for asset_name in asset_name_all: # drop missing values: target_cur = target_all.sel(asset=asset_name).dropna('time', 'any') features_cur = features_all.sel(asset=asset_name).dropna('time', 'any') # align features and targets: target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner') if len(features_cur.time) < 10: # not enough points for training continue model = create_model() try: model.fit(feature_for_learn_df.values, target_for_learn_df) models[asset_name] = model except KeyboardInterrupt as e: raise e except: logging.exception('model training failed') return models def predict(models, data): """Performs prediction and generates output weights. Generation is performed for several days in order to speed up the evaluation. """ asset_name_all = data.coords['asset'].values weights = xr.zeros_like(data.sel(field='close')) for asset_name in asset_name_all: if asset_name in models: model = models[asset_name] features_all = get_features(data) features_cur = features_all.sel(asset=asset_name).dropna('time','any') if len(features_cur.time) < 1: continue try: weights.loc[dict(asset=asset_name,time=features_cur.time.values)] = model.predict(features_cur.values) except KeyboardInterrupt as e: raise e except: logging.exception('model prediction failed') return weights weights = qnbt.backtest_ml( train=create_and_train_models, predict=predict, train_period=10*365, # the data length for training in calendar days retrain_interval=365, # how often we have to retrain models (calendar days) retrain_interval_after_submit=1, # how often retrain models after submission during evaluation (calendar days) predict_each_day=False, # Is it necessary to call prediction for every day during backtesting? # Set it to true if you suspect that get_features is looking forward. competition_type='cryptofutures', # competition type lookback_period=365, # how many calendar days are needed by the predict function to generate the output start_date='2014-01-01', # backtest start date build_plots=True # do you need the chart? )
I would like to use it for futures markets, too.
Just using this does not seem to work:
import logging import pandas as pd import xarray as xr import numpy as np import qnt.backtester as qnbt import qnt.ta as qnta def create_model(): """This is a constructor for the ML model which can be easily modified using different models or another logic for the combination. """ from sklearn.ensemble import VotingClassifier from sklearn.linear_model import SGDClassifier, RidgeClassifier import random # We will use a model obtained combining by voting Ridge Classifiers and SGD Classifiers # which use several random seeds to reduce overfitting: classifiers = [] r = random.Random(13) for i in range(42): classifiers.append(('ridge' + str(i), RidgeClassifier(random_state=r.randint(0, pow(2, 32) - 1)),)) classifiers.append(('sgd' + str(i), SGDClassifier(random_state=r.randint(0, pow(2, 32) - 1)),)) model = VotingClassifier(classifiers) return model def get_features(data): """Builds the features used for learning: * a trend indicator; * the stochastic oscillator; * volatility; * volume. """ trend = qnta.roc(qnta.lwma(data.sel(field='close'), 70), 1) # stochastic oscillator: k, d = qnta.stochastic(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'), 14) volatility = qnta.tr(data.sel(field='high'), data.sel(field='low'), data.sel(field='close')) volatility = volatility / data.sel(field='close') volatility = qnta.lwma(volatility, 14) volume = data.sel(field='vol') volume = qnta.sma(volume, 5) / qnta.sma(volume, 60) volume = volume.where(np.isfinite(volume), 0) # combine the selected four features: result = xr.concat( [trend, d, volatility, volume], pd.Index( ['trend', 'stochastic_d', 'volatility', 'volume'], name = 'field' ) ) return result.transpose('time', 'field', 'asset') def get_target_classes(data): """Builds target classes which will be later predicted.""" price_current = data.sel(field='close') price_future = qnta.shift(price_current, -1) class_positive = 1 class_negative = 0 target_is_price_up = xr.where(price_future > price_current, class_positive, class_negative) return target_is_price_up def create_and_train_models(data): """Create and train the models working on an asset-by-asset basis.""" asset_name_all = data.coords['asset'].values data = data.sel(time=slice('2013-05-01',None)) # cut the noisy data head before 2013-05-01 features_all = get_features(data) target_all = get_target_classes(data) models = dict() for asset_name in asset_name_all: # drop missing values: target_cur = target_all.sel(asset=asset_name).dropna('time', 'any') features_cur = features_all.sel(asset=asset_name).dropna('time', 'any') # align features and targets: target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner') if len(features_cur.time) < 10: # not enough points for training continue model = create_model() try: model.fit(feature_for_learn_df.values, target_for_learn_df) models[asset_name] = model except KeyboardInterrupt as e: raise e except: logging.exception('model training failed') return models def predict(models, data): """Performs prediction and generates output weights. Generation is performed for several days in order to speed up the evaluation. """ asset_name_all = data.coords['asset'].values weights = xr.zeros_like(data.sel(field='close')) for asset_name in asset_name_all: if asset_name in models: model = models[asset_name] features_all = get_features(data) features_cur = features_all.sel(asset=asset_name).dropna('time','any') if len(features_cur.time) < 1: continue try: weights.loc[dict(asset=asset_name,time=features_cur.time.values)] = model.predict(features_cur.values) except KeyboardInterrupt as e: raise e except: logging.exception('model prediction failed') return weights weights = qnbt.backtest_ml( train=create_and_train_models, predict=predict, train_period=10*365, # the data length for training in calendar days retrain_interval=365, # how often we have to retrain models (calendar days) retrain_interval_after_submit=1, # how often retrain models after submission during evaluation (calendar days) predict_each_day=False, # Is it necessary to call prediction for every day during backtesting? # Set it to true if you suspect that get_features is looking forward. competition_type='futures', # competition type lookback_period=365, # how many calendar days are needed by the predict function to generate the output start_date='2014-01-01', # backtest start date build_plots=True # do you need the chart? )
-
@magenta-grimer sorry now it seems to work as it is....