strategy

Predicting BTC Futures Using IMF Data

This example shows how to use data from the International Monetary Fund in order to predict bitcoin futures prices.

You can clone and edit this example there (tab Examples).


Strategy idea: We will open crypto futures BTC positions as predicted by the RidgeClassifier.

Features for learning:

5 best features use RFE (from sklearn.feature_selection import RFE)

Cryptofutures data:

  • a trend indicator;
  • the stochastic oscillator;
  • volatility;
  • volume.

IMF Commodity Data (monthly data)

  • sma
  • log
In [1]:
%%javascript
window.IPython && (IPython.OutputArea.prototype._should_scroll = function(lines) { return false; })
// disable widget scrolling
In [2]:
import logging

import pandas as pd
import xarray as xr
import numpy as np

import qnt.data    as qndata  # load and manipulate data
import qnt.backtester as qnbt  # backtester
import qnt.stats   as qnstats  # statistical functions for analysis
import qnt.ta      as qnta  # indicators library


def load_data(period):
    def align_data_by_time(data, data_for_align):
        data_for_outer = xr.align(data.time, data_for_align, join='outer')[1]
        ff = data_for_outer.ffill(dim='time')
        r = ff.sel(time=data.time)
        return r

    crypto_futures = qndata.cryptofutures_load_data(tail=period)
    commodity = align_data_by_time(
        data=crypto_futures,
        data_for_align=qndata.imf_load_commodity_data(tail=period))

    return dict(commodity=commodity,
                crypto_futures=crypto_futures), crypto_futures.time.values


def window(data, max_date: np.datetime64, lookback_period: int):
    min_date = max_date - np.timedelta64(lookback_period, 'D')
    return dict(
        crypto_futures=data['crypto_futures'].copy(True).sel(time=slice(min_date, max_date)),
        commodity=data['commodity'].copy(True).sel(time=slice(min_date, max_date))
    )


def create_model():
    from sklearn.linear_model import RidgeClassifier
    from sklearn.feature_selection import RFE
    model = RidgeClassifier(random_state=18)
    count_best_features = 5
    rfe = RFE(model, n_features_to_select=count_best_features)
    return rfe


def get_features(futures_commodity):
    data = futures_commodity['crypto_futures']

    trend = qnta.roc(qnta.lwma(data.sel(field='close'), 70), 1)

    # stochastic oscillator:
    k, d = qnta.stochastic(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'), 14)

    volatility = qnta.tr(data.sel(field='high'), data.sel(field='low'), data.sel(field='close'))
    volatility = volatility / data.sel(field='close')
    volatility = qnta.lwma(volatility, 14)

    volume = data.sel(field='vol')
    volume = qnta.sma(volume, 5) / qnta.sma(volume, 60)
    volume = volume.where(np.isfinite(volume), 0)

    crypto_features = xr.concat(
        [trend, d, volatility, volume],
        pd.Index(
            ['trend', 'stochastic_d', 'volatility', 'volume'],
            name='field'
        )
    )

    data_commodity = futures_commodity['commodity']
    data_commodity = data_commodity.rename({'asset': 'field'})

    sma = qnta.sma(data_commodity, 30) / qnta.sma(data_commodity, 60)
    log = np.log(data_commodity)

    commodity_features = xr.concat(
        [sma, log],
        dim='field'
    )

    commodity_merge = commodity_features.sel(time=crypto_features.time)
    features = xr.concat([crypto_features, commodity_merge], dim='field')
    return features.transpose('time', 'field', 'asset')


def get_target_classes(futures_commodity):
    """Builds target classes which will be later predicted."""
    data = futures_commodity['crypto_futures']

    price_current = data.sel(field='close')
    price_future = qnta.shift(price_current, -1)

    class_positive = 1
    class_negative = 0

    target_is_price_up = xr.where(price_future > price_current, class_positive, class_negative)

    return target_is_price_up


def create_and_train_models(futures_commodity):
    """Create and train the models working on an asset-by-asset basis."""

    features_all = get_features(futures_commodity)
    target_all = get_target_classes(futures_commodity)

    models = dict()
    asset_name_all = futures_commodity['crypto_futures'].coords['asset'].values
    for asset_name in asset_name_all:

        # drop missing values:
        target_for_asset = target_all.sel(asset=asset_name).dropna('time', 'any')
        features_for_asset = features_all.sel(asset=asset_name).dropna('time', 'any')

        # align features and targets:
        target_for_learn_df, feature_for_learn_df = xr.align(target_for_asset,
                                                             features_for_asset,
                                                             join='inner')

        is_few_data_for_train = len(target_for_learn_df.time) < 10
        if is_few_data_for_train:
            continue

        model = create_model()

        try:
            model.fit(feature_for_learn_df.values, target_for_learn_df)
            models[asset_name] = model
        except KeyboardInterrupt as e:
            raise e
        except:
            logging.exception('model training failed')

    return models


def predict(models, futures_commodity):
    """Performs prediction and generates output weights.
       Generation is performed for several days in order to speed
       up the evaluation.
    """
    data = futures_commodity['crypto_futures']
    weights = xr.zeros_like(data.sel(field='close'))

    asset_name_all = data.coords['asset'].values
    for asset_name in asset_name_all:
        if asset_name in models:
            model = models[asset_name]
            features_all = get_features(futures_commodity)
            features_cur = features_all.sel(asset=asset_name).dropna('time', 'any')
            if len(features_cur.time) < 1:
                continue
            try:
                weights.loc[dict(asset=asset_name, time=features_cur.time.values)] = model.predict(
                    features_cur.values)
            except KeyboardInterrupt as e:
                raise e
            except:
                logging.exception('model prediction failed')

    return weights


weights = qnbt.backtest_ml(
    train=create_and_train_models,
    predict=predict,
    train_period=10 * 365,  # the data length for training in calendar days
    retrain_interval=365,  # how often we have to retrain models (calendar days)
    retrain_interval_after_submit=1,  # how often retrain models after submission during evaluation (calendar days)
    predict_each_day=False,  # Is it necessary to call prediction for every day during backtesting?
    # Set it to true if you suspect that get_features is looking forward.
    competition_type='cryptofutures',  # competition type
    lookback_period=365,  # how many calendar days are needed by the predict function to generate the output
    start_date='2014-01-01',  # backtest start date
    build_plots=True,  # do you need the chart?
    load_data=load_data,
    window=window,
)
Run the last iteration...
100% (219152 of 219152) |################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (272368 of 272368) |################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (4172 of 4172) |####################| Elapsed Time: 0:00:00 Time:  0:00:00
Output cleaning...
fix uniq
ffill if the current price is None...
Check missed dates...
Ok.
Normalization...
Output cleaning is complete.
Write output: /root/fractions.nc.gz
State saved.
---
Run First Iteration...
100% (17072 of 17072) |##################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (191188 of 191188) |################| Elapsed Time: 0:00:00 Time:  0:00:00
---
Run all iterations...
Load data...
100% (242132 of 242132) |################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (225632 of 225632) |################| Elapsed Time: 0:00:00 Time:  0:00:00
Backtest...
100% (229232 of 229232) |################| Elapsed Time: 0:00:00 Time:  0:00:00
Output cleaning...
fix uniq
ffill if the current price is None...
Check missed dates...
Ok.
Normalization...
Output cleaning is complete.
Write output: /root/fractions.nc.gz
State saved.
---
Analyze results...
Check...
Check missed dates...
Ok.
Check the sharpe ratio...
Period: 2014-01-01 - 2024-04-16
Sharpe Ratio = 0.9320733791828499
ERROR! The Sharpe Ratio is too low. 0.9320733791828499 < 1
Improve the strategy and make sure that the in-sample Sharpe Ratio more than 1.
Check correlation.
WARNING! Can't calculate correlation.
Correlation check failed.
---
Align...
Calc global stats...
---
Calc stats per asset...
Build plots...
---
Output:
asset BTC
time
2024-04-07 0.0
2024-04-08 0.0
2024-04-09 0.0
2024-04-10 0.0
2024-04-11 0.0
2024-04-12 0.0
2024-04-13 0.0
2024-04-14 0.0
2024-04-15 0.0
2024-04-16 0.0
Stats:
field equity relative_return volatility underwater max_drawdown sharpe_ratio mean_return bias instruments avg_turnover avg_holding_time
time
2024-04-07 83.614781 0.0 0.578075 -0.099984 -0.687471 0.933714 0.539757 0.0 1.0 0.034521 134.894737
2024-04-08 83.614781 0.0 0.577998 -0.099984 -0.687471 0.933532 0.539579 0.0 1.0 0.034512 134.894737
2024-04-09 83.614781 0.0 0.577921 -0.099984 -0.687471 0.933349 0.539402 0.0 1.0 0.034503 134.894737
2024-04-10 83.614781 0.0 0.577844 -0.099984 -0.687471 0.933166 0.539225 0.0 1.0 0.034494 134.894737
2024-04-11 83.614781 0.0 0.577767 -0.099984 -0.687471 0.932984 0.539047 0.0 1.0 0.034485 134.894737
2024-04-12 83.614781 0.0 0.577690 -0.099984 -0.687471 0.932802 0.538870 0.0 1.0 0.034475 134.894737
2024-04-13 83.614781 0.0 0.577613 -0.099984 -0.687471 0.932619 0.538694 0.0 1.0 0.034466 134.894737
2024-04-14 83.614781 0.0 0.577537 -0.099984 -0.687471 0.932437 0.538517 0.0 1.0 0.034457 134.894737
2024-04-15 83.614781 0.0 0.577460 -0.099984 -0.687471 0.932255 0.538340 0.0 1.0 0.034448 134.894737
2024-04-16 83.614781 0.0 0.577383 -0.099984 -0.687471 0.932073 0.538163 0.0 1.0 0.034439 134.894737
---
100% (3752 of 3752) |####################| Elapsed Time: 0:01:11 Time:  0:01:11