{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 1) Data Preparation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "algo_name='algo_sma'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s3bucket=!(aws cloudformation list-exports --query \"Exports[?Name=='algotrading-s3bucket'].Value\" --output text)\n", "s3bucket=s3bucket[0]\n", "s3bucket" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pyathena import connect\n", "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/')\n", "\n", "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n", "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n", "del df['dt']\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "trainCount=int(len(df)*0.4)\n", "dfTrain = df.iloc[:trainCount]\n", "dfTest = df.iloc[trainCount:]\n", "\n", "dfTest.to_csv('/opt/ml/input/data/training/data.csv')\n", "dfTest.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "dfTest[\"close\"].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 2) Modify Strategy Configuration \n", "\n", "In the following cell, you can adjust the parameters for the strategy.\n", "\n", "* `fast_period` = Fast Period for Moving Average Indicator in min (e.g. 8)\n", "* `slow_period` = Slow Period for Moving Average Indicator in min (e.g. 21)\n", "* `size` = The number of shares for a transaction (e.g. 100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile /opt/ml/input/config/hyperparameters.json\n", "{ \"fast_period\" : \"8\",\n", " \"slow_period\" : \"21\",\n", " \"size\" : \"100\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%run /opt/program/update_config.py $algo_name $s3bucket" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 3) Modify Strategy Code\n", "\n", "Here are some helpful links:\n", "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n", "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n", "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile /opt/program/{algo_name}.py\n", "import backtrader as bt\n", "from algo_base import *\n", "import pytz\n", "from pytz import timezone\n", "\n", "class MyStrategy(StrategyTemplate):\n", "\n", " def __init__(self): # Initiation\n", " super(MyStrategy, self).__init__()\n", " self.config[\"fast_period\"]=int(self.config[\"fast_period\"])\n", " self.config[\"slow_period\"]=int(self.config[\"slow_period\"])\n", " self.config[\"size\"]=int(self.config[\"size\"])\n", " print(self.config)\n", " self.emaFast = bt.ind.ExponentialMovingAverage(period=self.config[\"fast_period\"])\n", " self.emaSlow = bt.ind.ExponentialMovingAverage(period=self.config[\"slow_period\"])\n", " self.size = self.config[\"size\"]\n", "\n", " def init_broker(broker):\n", " broker.setcash(100000.0)\n", " broker.setcommission(commission=0.0) \n", " \n", " def add_data(cerebro):\n", " data = btfeeds.GenericCSVData(\n", " dataname=MyStrategy.TRAIN_FILE,\n", " dtformat=('%Y-%m-%d'),\n", " timeframe=bt.TimeFrame.Days,\n", " datetime=0,\n", " time=-1,\n", " high=2,\n", " low=3,\n", " open=1,\n", " close=4,\n", " volume=5,\n", " openinterest=-1\n", " )\n", " cerebro.adddata(data)\n", "\n", " def next(self): # Processing\n", " super(MyStrategy, self).next()\n", " dt=self.datas[0].datetime.datetime(0)\n", " if not self.position:\n", " if self.emaFast[0] > self.emaSlow[0]:\n", " self.buy(size=self.size) # Go long\n", " else:\n", " self.sell(size=self.size) # Go short\n", " elif self.position.size>0 and self.emaFast[0] < self.emaSlow[0]:\n", " self.sell(size=2*self.size) # Go short\n", " elif self.position.size<0 and self.emaFast[0] > self.emaSlow[0]: \n", " self.buy(size=2*self.size) # Go long" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 4) Backtest Locally" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "%run /opt/program/train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 5) Backtest Remotely with SageMaker" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!aws s3 cp \"/opt/program/\" \"s3://{s3bucket}/{algo_name}/\" --recursive --exclude \"*\" --include \"{algo_name}*.*\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conf_file='/opt/ml/input/config/hyperparameters.json'\n", "with open(conf_file, 'r') as f:\n", " config = json.load(f)\n", "config['s3']=s3_bucket \n", "config['chart']='true'\n", "print(config)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#Run Remote Backtest via SageMaker\n", "import sagemaker as sage\n", "from sagemaker import get_execution_role\n", "from sagemaker.estimator import Estimator \n", "\n", "role = get_execution_role()\n", "sess = sage.Session()\n", "\n", "WORK_DIRECTORY = '/opt/ml/input/data/training'\n", "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n", "print(data_location)\n", "\n", "prefix=algo_name\n", "job_name=prefix.replace('_','-')\n", "\n", "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n", "region = sess.boto_session.region_name\n", "image = f'{account}.dkr.ecr.{region}.amazonaws.com/algotrading:1.0'\n", "\n", "algo = sage.estimator.Estimator(\n", " image_uri=image,\n", " role=role,\n", " instance_count=1,\n", " instance_type='ml.m4.xlarge',\n", " output_path=\"s3://{}/output\".format(sess.default_bucket()),\n", " sagemaker_session=sess,\n", " base_job_name=job_name,\n", " hyperparameters=config,\n", " metric_definitions=[\n", " {\n", " \"Name\": \"algo:pnl\",\n", " \"Regex\": \"Total PnL:(.*?)]\"\n", " },\n", " {\n", " \"Name\": \"algo:sharpe_ratio\",\n", " \"Regex\": \"Sharpe Ratio:(.*?),\"\n", " }\n", " ])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "algo.fit(data_location)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Get Algo Metrics\n", "from sagemaker.analytics import TrainingJobAnalytics\n", "\n", "latest_job_name = algo.latest_training_job.job_name\n", "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n", "metrics_dataframe" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Get Algo Chart from S3\n", "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n", "import boto3\n", "s3 = boto3.resource('s3')\n", "my_bucket = s3.Bucket(sess.default_bucket())\n", "my_bucket.download_file(model_name,'model.tar.gz')\n", "!tar -xzf model.tar.gz\n", "!rm model.tar.gz\n", "from IPython.display import Image\n", "Image(filename='chart.png') " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 6) Run Hyperparameter Optimization with SageMaker" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker.tuner import (\n", " IntegerParameter,\n", " CategoricalParameter,\n", " ContinuousParameter,\n", " HyperparameterTuner,\n", ")\n", "\n", "hyperparameter_ranges = {\n", " \"fast_period\": IntegerParameter(5, 10),\n", " \"slow_period\": IntegerParameter(21, 31)\n", "}\n", "objective_metric_name= \"algo:pnl\"\n", "tuner = HyperparameterTuner(algo,\n", " objective_metric_name,\n", " hyperparameter_ranges,\n", " max_jobs=6,\n", " max_parallel_jobs=3,\n", " metric_definitions=[\n", " {\n", " \"Name\": \"algo:pnl\",\n", " \"Regex\": \"Total PnL:(.*?)]\"\n", " }\n", " ]\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tuner.fit(data_location)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "best_params=boto3.client('sagemaker').describe_hyper_parameter_tuning_job(\n", "HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)['BestTrainingJob']['TunedHyperParameters']\n", "best_params" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker.analytics import TrainingJobAnalytics\n", "bestjob=tuner.best_training_job()\n", "metrics_dataframe = TrainingJobAnalytics(training_job_name=bestjob).dataframe()\n", "metrics_dataframe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 7) Backtest Locally with Optimal Parameters" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Use optimal hyperparameter and test data\n", "conf_file='/opt/ml/input/config/hyperparameters.json'\n", "with open(conf_file, 'r') as f:\n", " config = json.load(f)\n", "config['fast_period']=best_params['fast_period']\n", "config['slow_period']=best_params['slow_period']\n", "config['chart']='false'\n", "print(config)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%run /opt/program/train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Congratulations! You've completed this strategy." ] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }