# Train Forecast Model - Daily

In this notebook we'll train a deep learning model that learns if a target price or stop loss would be hit for a long/short trade in the next days based on historical price data.

Model:
* Multilayer Perceptron (MLP) (Feedforward neural network)
* 3 layers: input, hidden, output
* Binary Classification
* `Input`: Close, SMA(2 to 16), ROC(2 to 16)
* `Output`: Does a long or short trade hit the profit target (2%) without hitting a stop loss (1.5%) in the next five days?

In [None]:
%run ../2_Strategies/init_model.py 'model_long_short_predict'

In [None]:
%%writefile local/{model_name}/input/config/hyperparameters.json
{ 
}

# Step 1) Get Data from Athena and S3

In [None]:
# get S3 bucket
s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')
s3bucket=s3bucket[0]
s3bucket

In [None]:
import sys
!{sys.executable} -m pip install PyAthena

In [None]:
import os
import sagemaker as sage
from sagemaker import get_execution_role
import datetime
from sagemaker.tensorflow import TensorFlow
import json

role = get_execution_role()
sess = sage.Session()
region = sess.boto_session.region_name

In [None]:
import pandas as pd
from pyathena import connect
conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',
 region_name=region)

df = pd.read_sql("SELECT * FROM algo_data.hist_data_daily;", conn)
df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)
del df['dt']
df.head()

In [None]:
df.to_csv('local/'+model_name+'/input/data/training/data_orig.csv')
print("count=%s" % len(df))
df.head()

In [None]:
%matplotlib notebook
df["close"].plot()

# Step 2) Run Data Preparation Locally

## Modify Data Preparation Code

In the following cell, you can modify the data preparation code or leave it as is.

In [None]:
%%writefile model/{model_name}_prep.py
#!/usr/bin/env python

import numpy as np
import pandas as pd
import talib as ta
from talib.abstract import *
import math

prefix = '/opt/ml/'
input_path = prefix + 'input/data/training'

data_orig_file = input_path+'/data_orig.csv'
data_file = input_path+'/data.csv'

d = pd.read_csv(data_orig_file,infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
print(d.head())

repeatCount=15
repeatStep=1
lookBack=repeatCount*repeatStep
forwardWindow=5

profitTarget=2.0/100.0
stopTarget=1.5/100.0

iCount=lookBack

# header
hData=["dt"]
hData.append("close")
for a in range(0,repeatCount):
 hData.append("sma"+str((a+2)*repeatStep))
for a in range(0,repeatCount):
 hData.append("roc"+str((a+2)*repeatStep))
hData.append("long")
hData.append("short")

# data
tData=[]

inputs = {
 'close': np.array(d["close"])
}
sma=[]
for a in range(0,repeatCount):
 sma.append(SMA(inputs,timeperiod=(a+1)*repeatStep+1))
roc=[]
for a in range(0,repeatCount):
 roc.append(ROC(inputs,timeperiod=(a+1)*repeatStep+1))

closeList=d["close"]
dLen=len(d)
n=0
lCount=0
sCount=0
nCount=0
n=0
for idx,row in d.iterrows():
 if n=cl+cl*profitTarget and low>=cl-cl*stopTarget:
 long=1
 lCount=lCount+1
 inputRec.append(long)
 
 #short
 short=0
 if low<=cl-cl*profitTarget and high<=cl+cl*stopTarget:
 short=1
 sCount=sCount+1
 inputRec.append(short)

 tData.append(inputRec)
 n=n+1
 
print("lCount=%s,sCount=%s" % (lCount,sCount))
df1=pd.DataFrame(tData,columns=hData)
df1.set_index(pd.DatetimeIndex(df1['dt']), inplace=True)
del df1['dt']
 
df1.to_csv(data_file)
print(df1.head())
print("count=%s" % (len(df1)))

## Run Data Preparation Locally in a Docker Container

In [None]:
!cp model/{model_name}_prep.py model/train
!chmod 777 model/train
!docker build -t {model_name}_prep .
!docker run -v $(pwd)/local/$model_name:/opt/ml --rm {model_name}_prep train

## Create Training and Test Data

In [None]:
df = pd.read_csv("local/"+model_name+"/input/data/training/data.csv", infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])
print("totalCount=%s" % len(df))

trainCount=int(len(df)*0.4)
dfTrain = df.iloc[:trainCount]
dfTrain.to_csv("local/"+model_name+"/input/data/training/data_train.csv")
print("trainCount=%s" % len(dfTrain))

dfTest = df.iloc[trainCount:]
dfTest.to_csv("local/"+model_name+"/input/data/training/data_test.csv")
print("testCount=%s" % len(dfTest))
dfTest.head()

# Step 3) Train the Model

In the following cell, you can modify the model training code or leave it as is.

In [None]:
%%writefile model/{model_name}.py
#!/usr/bin/env python
from __future__ import print_function

import os
import sys
import traceback
import math
import numpy as np
import pandas as pd
import tensorflow as tf

from keras.layers import Dropout, Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasRegressor

yLen=2
b=0

# Optional
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# These are the paths to where SageMaker mounts interesting things in your
# container.
prefix = '/opt/ml/'

input_path = prefix + 'input/data/training/data_train.csv'
test_path = prefix + 'input/data/training/data_test.csv'

output_path = os.path.join(prefix, 'output')
model_path = os.path.join(prefix, 'model')

# Process and prepare the data
def data_process(df):
 global yLen
 global b
 dataX=[]
 dataY=[]
 for idx,row in df.iterrows():
 row1=[]
 r=row[1:len(row)-yLen]
 for a in r:
 row1.append(a)
 x=np.array(row1)
 y=np.array(row[len(row)-yLen:])
 b=len(x)
 dataX.append(x)
 dataY.append(y)
 dataX=np.array(dataX).astype(np.float32)
 dataY=np.array(dataY).astype(np.float32)
 return dataX,dataY,b

def build_classifier():
 global b
 global yLen
 print("build_classifier:b=%s,yLen=%s" % (b,yLen))
 model = Sequential()
 model.add(Dense(b, input_dim=b, kernel_initializer='normal', activation='relu'))
 model.add(Dropout(0.2))
 model.add(Dense(int(b/2), kernel_initializer='normal', activation='relu'))
 model.add(Dropout(0.2))
 model.add(Dense(yLen,kernel_initializer='normal', activation='sigmoid'))
 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 return model

def generate_model(dataX, dataY, b):
 model=build_classifier()
 model.fit(dataX, dataY, epochs=100, batch_size=1)
 scores = model.evaluate(dataX, dataY, verbose=0)
 print("Training Data %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
 return model
 
def train():
 print('Starting the training.')
 try:
 raw_data = pd.read_csv(input_path)
 #print(raw_data)
 X, y, b = data_process(raw_data)
 model = generate_model(X, y, b)
 model.save(os.path.join(model_path, 'model.h5'))
 
 print('Training is complete. Model saved.')
 
 raw_data = pd.read_csv(test_path)
 testX, testY, b = data_process(raw_data)
 scores = model.evaluate(testX, testY, verbose=0)
 print("Test Data %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
 
 except Exception as e:
 # Write out an error file. This will be returned as the failure
 # Reason in the DescribeTrainingJob result.
 trc = traceback.format_exc()
 with open(os.path.join(output_path, 'failure'), 'w') as s:
 s.write('Exception during training: ' + str(e) + '\n' + trc)
 # Printing this causes the exception to be in the training job logs
 print(
 'Exception during training: ' + str(e) + '\n' + trc,
 file=sys.stderr)
 # A non-zero exit code causes the training job to be marked as Failed.
 sys.exit(255)

if __name__ == '__main__':
 train()

 # A zero exit code causes the job to be marked a Succeeded.
 sys.exit(0)

### Option 1: Train Locally

You can choose if you want to do the training locally (Option 1) or remote via SageMaker (Option 2).

In [None]:
# Build Local ML Image
!cp model/{model_name}.py model/train
!chmod 777 model/train
!docker build -t {model_name} .
!docker run -v $(pwd)/local/$model_name:/opt/ml --rm {model_name} train

#### Copy Model Artifact to Strategies Folder

In [None]:
!ls -la local/{model_name}/model/model.h5
!cp local/{model_name}/model/model.h5 ../2_Strategies/model/{model_name}.h5
!ls -la ../2_Strategies/model/model_*.h5

### Option 2: Remote Training via SageMaker

You can choose if you want to do the training locally (Option 1) or remote via SageMaker (Option 2).

In [None]:
# Deploy ML Image to ECS
!./build_and_push.sh $model_name

In [None]:
import os
import sagemaker as sage
from sagemaker import get_execution_role
import datetime
from sagemaker.tensorflow import TensorFlow
import json

role = get_execution_role()
sess = sage.Session()

WORK_DIRECTORY = 'local/'+model_name+'/input/data/training'
data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')
print(data_location)

conf_file='local/'+model_name+'/input/config/hyperparameters.json'
with open(conf_file, 'r') as f:
 config = json.load(f)
print(config)

prefix=model_name
job_name=prefix.replace('_','-')

account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'

classifier = sage.estimator.Estimator(
 image_uri=image,
 role=role,
 instance_count=1,
 instance_type='ml.m4.xlarge',
 output_path="s3://{}/output".format(sess.default_bucket()),
 sagemaker_session=sess,
 base_job_name=job_name)
classifier.fit(data_location)

#### Download Model Artifact from Amazon S3 and copy it to Strategies Folder

In [None]:
#Get Model from S3
model_name_s3=classifier.model_data.replace('s3://'+sess.default_bucket()+'/','')
import boto3
s3 = boto3.resource('s3')
my_bucket = s3.Bucket(sess.default_bucket())
my_bucket.download_file(model_name_s3,'model.tar.gz')
!tar -xzf model.tar.gz
!rm model.tar.gz
!cp model.h5 ../2_Strategies/model/{model_name}.h5
!ls -la model.h5
!ls -la ../2_Strategies/model/model_*.h5