{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%run init_model.py 'algo_simple_sma'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 1) Data Preparation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get S3 bucket\n", "s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')\n", "s3bucket=s3bucket[0]\n", "s3bucket" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "!{sys.executable} -m pip install PyAthena" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sagemaker as sage\n", "from sagemaker import get_execution_role\n", "import datetime\n", "from sagemaker.tensorflow import TensorFlow\n", "import json\n", "\n", "role = get_execution_role()\n", "sess = sage.Session()\n", "region = sess.boto_session.region_name" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pyathena import connect\n", "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',\n", " region_name=region)\n", "\n", "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n", "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n", "del df['dt']\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trainCount=int(len(df)*0.4)\n", "dfTrain = df.iloc[:trainCount]\n", "\n", "dfTest = df.iloc[trainCount:]\n", "dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')\n", "dfTest.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "%matplotlib notebook\n", "dfTest[\"close\"].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 2) Modify Strategy Configuration \n", "\n", "In the following cell, you can adjust the parameters for the strategy.\n", "\n", "* `user` = Name for Leaderboard (optional)\n", "* `fast_period` = Fast Period for Moving Average Indicator in min (e.g. 50)\n", "* `slow_period` = Slow Period for Moving Average Indicator in min (e.g. 200)\n", "* `size` = The number of shares for a transaction\n", "\n", "`Tip`: A good starting point for improving the strategy is to reduce the number of trades that get triggered by increasing the slow and fast period. Longer periods tend to perform better. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile local/{algo_name}/input/config/hyperparameters.json\n", "{ \"user\" : \"user\",\n", " \"fast_period\" : \"50\",\n", " \"slow_period\" : \"200\",\n", " \"size\" : \"100\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%run update_config.py $algo_name" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 3) Modify Strategy Code\n", "\n", "In the following cell, you can modify the strategy code. For the first backtests, you can leave it as is.\n", "\n", "`Tip`: A good starting point for improving the strategy is to try different indicators like ExponentialMovingAverage or delay when trades are triggered and check crossover again before placing a trade.\n", "\n", "Here are some helpful links:\n", "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n", "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n", "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile model/{algo_name}.py\n", "import backtrader as bt\n", "from algo_base import *\n", "\n", "class MyStrategy(StrategyTemplate):\n", "\n", " def __init__(self): # Initiation\n", " super(MyStrategy, self).__init__()\n", " self.config[\"fast_period\"]=int(self.config[\"fast_period\"])\n", " self.config[\"slow_period\"]=int(self.config[\"slow_period\"])\n", " self.config[\"size\"]=int(self.config[\"size\"])\n", "\n", " self.smaFast = bt.ind.SimpleMovingAverage(period=self.config[\"fast_period\"])\n", " self.smaSlow = bt.ind.SimpleMovingAverage(period=self.config[\"slow_period\"])\n", " self.size = self.config[\"size\"]\n", "\n", " def init_broker(broker):\n", " broker.setcash(100000.0)\n", " broker.setcommission(commission=0.0) \n", " \n", " def add_data(cerebro):\n", " data = btfeeds.GenericCSVData(\n", " dataname=MyStrategy.TRAIN_FILE,\n", " dtformat=('%Y-%m-%d'),\n", " timeframe=bt.TimeFrame.Days,\n", " datetime=0,\n", " time=-1,\n", " high=2,\n", " low=3,\n", " open=1,\n", " close=4,\n", " volume=5,\n", " openinterest=-1\n", " )\n", " cerebro.adddata(data)\n", "\n", " def next(self): # Processing\n", " super(MyStrategy, self).next()\n", " dt=self.datas[0].datetime.datetime(0)\n", " if not self.position:\n", " if self.smaFast[0] > self.smaSlow[0]:\n", " self.buy(size=self.size) # Go long\n", " else:\n", " self.sell(size=self.size) # Go short\n", " elif self.position.size>0 and self.smaFast[0] < self.smaSlow[0]:\n", " self.sell(size=2*self.size) # Go short\n", " elif self.position.size<0 and self.smaFast[0] > self.smaSlow[0]: \n", " self.buy(size=2*self.size) # Go long" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 4) Backtest Locally\n", "\n", "**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Build Local Algo Image\n", "!docker build -t $algo_name .\n", "!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import Image\n", "Image(filename='local/'+algo_name+'/model/chart.png')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Step 5) Backtest on SageMaker and submit performance" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Deploy Algo Image to ECS\n", "!./build_and_push.sh $algo_name" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Run Remote test via SageMaker\n", "import sagemaker as sage\n", "from sagemaker import get_execution_role\n", "from sagemaker.estimator import Estimator \n", "\n", "role = get_execution_role()\n", "sess = sage.Session()\n", "\n", "WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'\n", "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n", "print(data_location)\n", "\n", "conf_file='local/'+algo_name+'/input/config/hyperparameters.json'\n", "with open(conf_file, 'r') as f:\n", " config = json.load(f)\n", "#config['sim_data']='True'\n", "print(config)\n", "\n", "prefix=algo_name\n", "job_name=prefix.replace('_','-')\n", "\n", "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n", "region = sess.boto_session.region_name\n", "image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'\n", "\n", "algo = sage.estimator.Estimator(\n", " image_uri=image,\n", " role=role,\n", " instance_count=1,\n", " instance_type='ml.m4.xlarge',\n", " output_path=\"s3://{}/output\".format(sess.default_bucket()),\n", " sagemaker_session=sess,\n", " base_job_name=job_name,\n", " hyperparameters=config,\n", " metric_definitions=[\n", " {\n", " \"Name\": \"algo:pnl\",\n", " \"Regex\": \"Total PnL:(.*?)]\"\n", " },\n", " {\n", " \"Name\": \"algo:sharpe_ratio\",\n", " \"Regex\": \"Sharpe Ratio:(.*?),\"\n", " }\n", " ])\n", "algo.fit(data_location)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sagemaker.analytics import TrainingJobAnalytics\n", "\n", "latest_job_name = algo.latest_training_job.job_name\n", "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n", "metrics_dataframe" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#Get Algo Chart from S3\n", "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n", "import boto3\n", "s3 = boto3.resource('s3')\n", "my_bucket = s3.Bucket(sess.default_bucket())\n", "my_bucket.download_file(model_name,'model.tar.gz')\n", "!tar -xzf model.tar.gz\n", "!rm model.tar.gz\n", "from IPython.display import Image\n", "Image(filename='chart.png') " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Congratulations! ### Congratulations! You've completed this strategy.