Navigation

    Quantiacs Community

    • Register
    • Login
    • Search
    • Categories
    • News
    • Recent
    • Tags
    • Popular
    • Users
    • Groups

    Help me in using Voting Classifier ML on Futures

    Support
    1
    2
    171
    Loading More Posts
    • Oldest to Newest
    • Newest to Oldest
    • Most Votes
    Reply
    • Reply as topic
    Log in to reply
    This topic has been deleted. Only users with topic management privileges can see it.
    • magenta.grimer
      magenta.grimer last edited by

      Hi!

      This is the default voting classifier strategy for cryptos:

      import logging
      
      import pandas as pd
      import xarray as xr
      import numpy as np
      
      import qnt.backtester as qnbt
      import qnt.ta as qnta
      
      def create_model():
          """This is a constructor for the ML model which can be easily modified using
             different models or another logic for the combination.
          """
          
          from sklearn.ensemble import VotingClassifier
          from sklearn.linear_model import SGDClassifier, RidgeClassifier
          import random
          
          # We will use a model obtained combining by voting Ridge Classifiers and SGD Classifiers
          # which use several random seeds to reduce overfitting:
          classifiers = []
          r = random.Random(13)
          for i in range(42):
              classifiers.append(('ridge' + str(i), RidgeClassifier(random_state=r.randint(0, pow(2, 32) - 1)),))
              classifiers.append(('sgd' + str(i), SGDClassifier(random_state=r.randint(0, pow(2, 32) - 1)),))
          model = VotingClassifier(classifiers)
      
          return model
      
      def get_features(data):
          """Builds the features used for learning:
             * a trend indicator;
             * the stochastic oscillator;
             * volatility;
             * volume.
          """
          
          trend = qnta.roc(qnta.lwma(data.sel(field='close'), 70), 1)
      
          # stochastic oscillator:
          k, d = qnta.stochastic(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'), 14)
      
          volatility = qnta.tr(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'))
          volatility = volatility / data.sel(field='close')
          volatility = qnta.lwma(volatility, 14)
      
          volume = data.sel(field='vol')
          volume = qnta.sma(volume, 5) / qnta.sma(volume, 60)
          volume = volume.where(np.isfinite(volume), 0)
      
          # combine the selected four features:
          result = xr.concat(
              [trend, d, volatility, volume],
              pd.Index(
                  ['trend', 'stochastic_d', 'volatility', 'volume'],
                  name = 'field'
              )
          )
          
          return result.transpose('time', 'field', 'asset')
      
      def get_target_classes(data):
          """Builds target classes which will be later predicted."""
      
          price_current = data.sel(field='close')
          price_future = qnta.shift(price_current, -1)
      
          class_positive = 1
          class_negative = 0
      
          target_is_price_up = xr.where(price_future > price_current, class_positive, class_negative)
          
          return target_is_price_up
      
      def create_and_train_models(data):
          """Create and train the models working on an asset-by-asset basis."""
          
          asset_name_all = data.coords['asset'].values
      
          data = data.sel(time=slice('2013-05-01',None)) # cut the noisy data head before 2013-05-01
      
          features_all = get_features(data)
          target_all = get_target_classes(data)
      
          models = dict()
      
          for asset_name in asset_name_all:
              
              # drop missing values:
              target_cur = target_all.sel(asset=asset_name).dropna('time', 'any')
              features_cur = features_all.sel(asset=asset_name).dropna('time', 'any')
      
              # align features and targets:
              target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner')
      
              if len(features_cur.time) < 10:
                  # not enough points for training
                  continue
      
              model = create_model()
              
              try:
                  model.fit(feature_for_learn_df.values, target_for_learn_df)
                  models[asset_name] = model
              except KeyboardInterrupt as e:
                  raise e
              except:
                  logging.exception('model training failed')
      
          return models
      
      def predict(models, data):
          """Performs prediction and generates output weights.
             Generation is performed for several days in order to speed 
             up the evaluation.
          """
          
          asset_name_all = data.coords['asset'].values
          weights = xr.zeros_like(data.sel(field='close'))
          
          for asset_name in asset_name_all:
              if asset_name in models:
                  model = models[asset_name]
                  features_all = get_features(data)
                  features_cur = features_all.sel(asset=asset_name).dropna('time','any')
                  if len(features_cur.time) < 1:
                      continue
                  try:
                      weights.loc[dict(asset=asset_name,time=features_cur.time.values)] = model.predict(features_cur.values)
                  except KeyboardInterrupt as e:
                      raise e
                  except:
                      logging.exception('model prediction failed')
                      
          return weights
      
      
      weights = qnbt.backtest_ml(
          train=create_and_train_models,
          predict=predict,
          train_period=10*365,   # the data length for training in calendar days
          retrain_interval=365,  # how often we have to retrain models (calendar days)
          retrain_interval_after_submit=1, # how often retrain models after submission during evaluation (calendar days)
          predict_each_day=False,  # Is it necessary to call prediction for every day during backtesting?
                                   # Set it to true if you suspect that get_features is looking forward.
          competition_type='cryptofutures',  # competition type
          lookback_period=365,      # how many calendar days are needed by the predict function to generate the output
          start_date='2014-01-01',  # backtest start date
          build_plots=True          # do you need the chart?
      )
      

      I would like to use it for futures markets, too.

      Just using this does not seem to work:

      import logging
      
      import pandas as pd
      import xarray as xr
      import numpy as np
      
      import qnt.backtester as qnbt
      import qnt.ta as qnta
      
      def create_model():
          """This is a constructor for the ML model which can be easily modified using
             different models or another logic for the combination.
          """
          
          from sklearn.ensemble import VotingClassifier
          from sklearn.linear_model import SGDClassifier, RidgeClassifier
          import random
          
          # We will use a model obtained combining by voting Ridge Classifiers and SGD Classifiers
          # which use several random seeds to reduce overfitting:
          classifiers = []
          r = random.Random(13)
          for i in range(42):
              classifiers.append(('ridge' + str(i), RidgeClassifier(random_state=r.randint(0, pow(2, 32) - 1)),))
              classifiers.append(('sgd' + str(i), SGDClassifier(random_state=r.randint(0, pow(2, 32) - 1)),))
          model = VotingClassifier(classifiers)
      
          return model
      
      def get_features(data):
          """Builds the features used for learning:
             * a trend indicator;
             * the stochastic oscillator;
             * volatility;
             * volume.
          """
          
          trend = qnta.roc(qnta.lwma(data.sel(field='close'), 70), 1)
      
          # stochastic oscillator:
          k, d = qnta.stochastic(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'), 14)
      
          volatility = qnta.tr(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'))
          volatility = volatility / data.sel(field='close')
          volatility = qnta.lwma(volatility, 14)
      
          volume = data.sel(field='vol')
          volume = qnta.sma(volume, 5) / qnta.sma(volume, 60)
          volume = volume.where(np.isfinite(volume), 0)
      
          # combine the selected four features:
          result = xr.concat(
              [trend, d, volatility, volume],
              pd.Index(
                  ['trend', 'stochastic_d', 'volatility', 'volume'],
                  name = 'field'
              )
          )
          
          return result.transpose('time', 'field', 'asset')
      
      def get_target_classes(data):
          """Builds target classes which will be later predicted."""
      
          price_current = data.sel(field='close')
          price_future = qnta.shift(price_current, -1)
      
          class_positive = 1
          class_negative = 0
      
          target_is_price_up = xr.where(price_future > price_current, class_positive, class_negative)
          
          return target_is_price_up
      
      def create_and_train_models(data):
          """Create and train the models working on an asset-by-asset basis."""
          
          asset_name_all = data.coords['asset'].values
      
          data = data.sel(time=slice('2013-05-01',None)) # cut the noisy data head before 2013-05-01
      
          features_all = get_features(data)
          target_all = get_target_classes(data)
      
          models = dict()
      
          for asset_name in asset_name_all:
              
              # drop missing values:
              target_cur = target_all.sel(asset=asset_name).dropna('time', 'any')
              features_cur = features_all.sel(asset=asset_name).dropna('time', 'any')
      
              # align features and targets:
              target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join='inner')
      
              if len(features_cur.time) < 10:
                  # not enough points for training
                  continue
      
              model = create_model()
              
              try:
                  model.fit(feature_for_learn_df.values, target_for_learn_df)
                  models[asset_name] = model
              except KeyboardInterrupt as e:
                  raise e
              except:
                  logging.exception('model training failed')
      
          return models
      
      def predict(models, data):
          """Performs prediction and generates output weights.
             Generation is performed for several days in order to speed 
             up the evaluation.
          """
          
          asset_name_all = data.coords['asset'].values
          weights = xr.zeros_like(data.sel(field='close'))
          
          for asset_name in asset_name_all:
              if asset_name in models:
                  model = models[asset_name]
                  features_all = get_features(data)
                  features_cur = features_all.sel(asset=asset_name).dropna('time','any')
                  if len(features_cur.time) < 1:
                      continue
                  try:
                      weights.loc[dict(asset=asset_name,time=features_cur.time.values)] = model.predict(features_cur.values)
                  except KeyboardInterrupt as e:
                      raise e
                  except:
                      logging.exception('model prediction failed')
                      
          return weights
      
      
      weights = qnbt.backtest_ml(
          train=create_and_train_models,
          predict=predict,
          train_period=10*365,   # the data length for training in calendar days
          retrain_interval=365,  # how often we have to retrain models (calendar days)
          retrain_interval_after_submit=1, # how often retrain models after submission during evaluation (calendar days)
          predict_each_day=False,  # Is it necessary to call prediction for every day during backtesting?
                                   # Set it to true if you suspect that get_features is looking forward.
          competition_type='futures',  # competition type
          lookback_period=365,      # how many calendar days are needed by the predict function to generate the output
          start_date='2014-01-01',  # backtest start date
          build_plots=True          # do you need the chart?
      )
      
      magenta.grimer 1 Reply Last reply Reply Quote 0
      • magenta.grimer
        magenta.grimer @magenta.grimer last edited by

        @magenta-grimer sorry now it seems to work as it is....

        1 Reply Last reply Reply Quote 0
        • First post
          Last post
        Powered by NodeBB | Contributors
        • Documentation
        • About
        • Career
        • My account
        • Privacy policy
        • Terms and Conditions
        • Cookies policy
        Home
        Copyright © 2014 - 2021 Quantiacs LLC.
        Powered by NodeBB | Contributors