In [None]:
%run init_model.py 'algo_ml_long_short_predict'

# Step 1) Data Preparation

In [None]:
# get S3 bucket
s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')
s3bucket=s3bucket[0]
s3bucket

In [None]:
import sys
!{sys.executable} -m pip install PyAthena

In [None]:
import os
import sagemaker as sage
from sagemaker import get_execution_role
import datetime
from sagemaker.tensorflow import TensorFlow
import json

role = get_execution_role()
sess = sage.Session()
region = sess.boto_session.region_name

In [None]:
import pandas as pd
from pyathena import connect
conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',
 region_name=region)

df = pd.read_sql("SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;", conn)
df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)
del df['dt']
df.head()

In [None]:
trainCount=int(len(df)*0.4)
dfTrain = df.iloc[:trainCount]

dfTest = df.iloc[trainCount:]
dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')
dfTest.head()

In [None]:
%matplotlib notebook
dfTest["close"].plot()

# Step 2) Modify Strategy Configuration 

In the following cell, you can adjust the parameters for the strategy.

* `user` = Name for Leaderboard (optional)
* `long_threshold` = Threshold for Long Trade (0 to 1)
* `short_threshold` = Threshold for Short Trade (0 to 1)
* `profit_target_pct` = Profit Target Percentage 
* `stop_target_pct` = Stop Target Percentage
* `size` = The number of shares for a transaction

`Tip`: A good starting point for improving the strategy is modify the profit / stop target and the risk/reward ratio. Another option is to reduce the number of signals by increasing the threshold.

In [None]:
%%writefile local/{algo_name}/input/config/hyperparameters.json
{ "user" : "user",
 "long_threshold" : "0.5",
 "short_threshold" : "0.5",
 "profit_target_pct" : "2.00",
 "stop_target_pct" : "1.50",
 "size" : "100"
}

In [None]:
%run update_config.py $algo_name

# Step 3) Modify Strategy Code

In the following cell, you can modify the strategy code. For the first backtests, you can leave it as is.

`Tip`: A good starting point for improving the strategy is to combine the signal from the model with traditional trend indicators (e.g. moving average). This will likely improve the performance. To improve the strategy further, you could increase the accuracy of the machine learning model by including more indicators (e.g. ATR) or modify the input and forecast window. This requires to re-train the machine learning model as this needs to match your strategy. For timeseries forecasting, you could compare the performance with more advanced ML networks (e.g. CNN, LTSM, RNN) and pick the model with the best predictions.

You can also checkout other AWS-provided options for timeseries forecasting and formulate a strategy that uses price predictions and integrate them in your strategy:
* https://docs.aws.amazon.com/sagemaker/latest/dg/deepar.html 
* https://aws.amazon.com/forecast/

Here are some helpful links:
* Backtrader Documentation: https://www.backtrader.com/docu/strategy/
* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/
* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/

In [None]:
%%writefile model/{algo_name}.py
import backtrader as bt
from algo_base import *
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import load_model

class MyStrategy(StrategyTemplate):

 def __init__(self):
 super(MyStrategy, self).__init__()
 self.config["long_threshold"]=float(self.config["long_threshold"])
 self.config["short_threshold"]=float(self.config["short_threshold"])
 self.config["size"]=int(self.config["size"])
 self.config["profit_target_pct"]=float(self.config["profit_target_pct"])
 self.config["stop_target_pct"]=float(self.config["stop_target_pct"])

 self.order=None
 self.orderPlaced=False
 
 self.model = load_model('model_long_short_predict.h5')
 
 # input / indicators
 self.repeatCount=15
 self.repeatStep=1
 
 self.profitTarget=self.config["profit_target_pct"]/100.0
 self.stopTarget=self.config["stop_target_pct"]/100.0
 self.size=self.config["size"]
 
 self.sma=[]
 self.roc=[]
 
 self.hData=["dt"]
 self.hData.append("close") 
 for a in range(0,self.repeatCount):
 tp=(a+1)*self.repeatStep+1
 self.hData.append("sma"+str(tp))
 self.sma.append(bt.talib.SMA(self.data, timeperiod=tp, plot=False))
 for a in range(0,self.repeatCount):
 tp=(a+1)*self.repeatStep+1
 self.hData.append("roc"+str(tp))
 self.roc.append(bt.talib.ROC(self.data, timeperiod=tp, plot=False))

 def init_broker(broker):
 broker.setcash(100000.0)
 broker.setcommission(commission=0.0) 
 
 def add_data(cerebro):
 data = btfeeds.GenericCSVData(
 dataname=MyStrategy.TRAIN_FILE,
 dtformat=('%Y-%m-%d'),
 timeframe=bt.TimeFrame.Days,
 datetime=0,
 time=-1,
 high=2,
 low=3,
 open=1,
 close=4,
 volume=5,
 openinterest=-1
 )
 cerebro.adddata(data)

 def next(self):
 super(MyStrategy, self).next()
 
 dt=self.datas[0].datetime.datetime(0)
 cl=self.dataclose[0]
 inputRec=[] 

 #open
 inputRec0=[]
 inputRec0.append(cl)

 #sma
 for a in range(0,self.repeatCount):
 if math.isnan(self.sma[a][0]):
 inputRec0.append(cl)
 else:
 inputRec0.append(self.sma[a][0])

 m1=min(inputRec0)
 m2=max(inputRec0)
 for a in inputRec0:
 if m2-m1==0:
 inputRec.append(0)
 else:
 inputRec.append((a-m1)/(m2-m1))

 #roc
 for a in range(0,self.repeatCount):
 if math.isnan(self.roc[a][0]):
 inputRec.append(0)
 else:
 inputRec.append(self.roc[a][0])

 mX=[]
 mX.append(np.array(inputRec))
 dataX=np.array(mX)
 #print("dataX=%s" % dataX)

 # *** ML prediction ***
 mY=self.model.predict(dataX)
 #print("mY=%s" % mY)
 tLong=mY[0][0]
 tShort=mY[0][1]
 #print("[%s]:long=%s,short=%s" % (dt,tLong,tShort))
 if not self.position:
 fLong=(tLong>self.config["long_threshold"]) 
 fShort=(tShort>self.config["short_threshold"])
 if fLong:
 self.order=self.buy(size=self.size)
 self.limitPrice=cl+self.profitTarget*cl
 self.stopPrice=cl-self.stopTarget*cl
 elif fShort:
 self.order=self.sell(size=self.size) 
 self.limitPrice=cl-self.profitTarget*cl
 self.stopPrice=cl+self.stopTarget*cl

 if self.position:
 if self.position.size>0:
 if cl>=self.limitPrice or cl<=self.stopPrice:
 self.order=self.sell(size=self.size)
 elif self.position.size<0:
 if cl<=self.limitPrice or cl>=self.stopPrice:
 self.order=self.buy(size=self.size)

# Step 4) Backtest Locally

**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**

In [None]:
#Build Local Algo Image
!docker build -t $algo_name .
!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train

In [None]:
from IPython.display import Image
Image(filename='local/'+algo_name+'/model/chart.png')

## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step.

# Step 5) Backtest on SageMaker and submit performance

In [None]:
#Deploy Algo Image to ECS
!./build_and_push.sh $algo_name

In [None]:
#Run Remote Forwardtest via SageMaker
import sagemaker as sage
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator 

role = get_execution_role()
sess = sage.Session()

WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'
data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')
print(data_location)

conf_file='local/'+algo_name+'/input/config/hyperparameters.json'
with open(conf_file, 'r') as f:
 config = json.load(f)
#config['sim_data']='True'
print(config)

prefix=algo_name
job_name=prefix.replace('_','-')

account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'

algo = sage.estimator.Estimator(
 image_uri=image,
 role=role,
 instance_count=1,
 instance_type='ml.m4.xlarge',
 output_path="s3://{}/output".format(sess.default_bucket()),
 sagemaker_session=sess,
 base_job_name=job_name,
 hyperparameters=config,
 metric_definitions=[
 {
 "Name": "algo:pnl",
 "Regex": "Total PnL:(.*?)]"
 },
 {
 "Name": "algo:sharpe_ratio",
 "Regex": "Sharpe Ratio:(.*?),"
 }
 ])
algo.fit(data_location)

In [None]:
#Get Algo Metrics
from sagemaker.analytics import TrainingJobAnalytics

latest_job_name = algo.latest_training_job.job_name
metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()
metrics_dataframe

In [None]:
#Get Algo Chart from S3
model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')
import boto3
s3 = boto3.resource('s3')
my_bucket = s3.Bucket(sess.default_bucket())
my_bucket.download_file(model_name,'model.tar.gz')
!tar -xzf model.tar.gz
!rm model.tar.gz
from IPython.display import Image
Image(filename='chart.png') 

### Congratulations! You've completed this strategy.