{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 1) Data Preparation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "algo_name='algo_sma'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s3bucket=!(aws cloudformation list-exports --query \"Exports[?Name=='algotrading-s3bucket'].Value\" --output text)\n",
    "s3bucket=s3bucket[0]\n",
    "s3bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from pyathena import connect\n",
    "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/')\n",
    "\n",
    "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n",
    "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n",
    "del df['dt']\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "trainCount=int(len(df)*0.4)\n",
    "dfTrain = df.iloc[:trainCount]\n",
    "dfTest = df.iloc[trainCount:]\n",
    "\n",
    "dfTest.to_csv('/opt/ml/input/data/training/data.csv')\n",
    "dfTest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "dfTest[\"close\"].plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 2) Modify Strategy Configuration \n",
    "\n",
    "In the following cell, you can adjust the parameters for the strategy.\n",
    "\n",
    "* `fast_period` = Fast Period for Moving Average Indicator in min (e.g. 8)\n",
    "* `slow_period` = Slow Period for Moving Average Indicator in min (e.g. 21)\n",
    "* `size` = The number of shares for a transaction (e.g. 100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%writefile /opt/ml/input/config/hyperparameters.json\n",
    "{ \"fast_period\" : \"8\",\n",
    "  \"slow_period\" : \"21\",\n",
    "  \"size\" : \"100\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run /opt/program/update_config.py $algo_name $s3bucket"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 3) Modify Strategy Code\n",
    "\n",
    "Here are some helpful links:\n",
    "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n",
    "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n",
    "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%writefile /opt/program/{algo_name}.py\n",
    "import backtrader as bt\n",
    "from algo_base import *\n",
    "import pytz\n",
    "from pytz import timezone\n",
    "\n",
    "class MyStrategy(StrategyTemplate):\n",
    "\n",
    "    def __init__(self):  # Initiation\n",
    "        super(MyStrategy, self).__init__()\n",
    "        self.config[\"fast_period\"]=int(self.config[\"fast_period\"])\n",
    "        self.config[\"slow_period\"]=int(self.config[\"slow_period\"])\n",
    "        self.config[\"size\"]=int(self.config[\"size\"])\n",
    "        print(self.config)\n",
    "        self.emaFast = bt.ind.ExponentialMovingAverage(period=self.config[\"fast_period\"])\n",
    "        self.emaSlow = bt.ind.ExponentialMovingAverage(period=self.config[\"slow_period\"])\n",
    "        self.size = self.config[\"size\"]\n",
    "\n",
    "    def init_broker(broker):\n",
    "        broker.setcash(100000.0)\n",
    "        broker.setcommission(commission=0.0) \n",
    "        \n",
    "    def add_data(cerebro):\n",
    "        data = btfeeds.GenericCSVData(\n",
    "            dataname=MyStrategy.TRAIN_FILE,\n",
    "            dtformat=('%Y-%m-%d'),\n",
    "            timeframe=bt.TimeFrame.Days,\n",
    "            datetime=0,\n",
    "            time=-1,\n",
    "            high=2,\n",
    "            low=3,\n",
    "            open=1,\n",
    "            close=4,\n",
    "            volume=5,\n",
    "            openinterest=-1\n",
    "        )\n",
    "        cerebro.adddata(data)\n",
    "\n",
    "    def next(self):  # Processing\n",
    "        super(MyStrategy, self).next()\n",
    "        dt=self.datas[0].datetime.datetime(0)\n",
    "        if not self.position:\n",
    "            if self.emaFast[0] > self.emaSlow[0]:\n",
    "                self.buy(size=self.size) # Go long\n",
    "            else:\n",
    "                self.sell(size=self.size) # Go short\n",
    "        elif self.position.size>0 and self.emaFast[0] < self.emaSlow[0]:\n",
    "            self.sell(size=2*self.size) # Go short\n",
    "        elif self.position.size<0 and self.emaFast[0] > self.emaSlow[0]:          \n",
    "            self.buy(size=2*self.size) # Go long"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 4) Backtest Locally"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%run /opt/program/train"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 5) Backtest Remotely with SageMaker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 cp \"/opt/program/\" \"s3://{s3bucket}/{algo_name}/\" --recursive --exclude \"*\" --include \"{algo_name}*.*\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "conf_file='/opt/ml/input/config/hyperparameters.json'\n",
    "with open(conf_file, 'r') as f:\n",
    "    config = json.load(f)\n",
    "config['s3']=s3_bucket    \n",
    "config['chart']='true'\n",
    "print(config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#Run Remote Backtest via SageMaker\n",
    "import sagemaker as sage\n",
    "from sagemaker import get_execution_role\n",
    "from sagemaker.estimator import Estimator \n",
    "\n",
    "role = get_execution_role()\n",
    "sess = sage.Session()\n",
    "\n",
    "WORK_DIRECTORY = '/opt/ml/input/data/training'\n",
    "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n",
    "print(data_location)\n",
    "\n",
    "prefix=algo_name\n",
    "job_name=prefix.replace('_','-')\n",
    "\n",
    "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n",
    "region = sess.boto_session.region_name\n",
    "image = f'{account}.dkr.ecr.{region}.amazonaws.com/algotrading:1.0'\n",
    "\n",
    "algo = sage.estimator.Estimator(\n",
    "    image_uri=image,\n",
    "    role=role,\n",
    "    instance_count=1,\n",
    "    instance_type='ml.m4.xlarge',\n",
    "    output_path=\"s3://{}/output\".format(sess.default_bucket()),\n",
    "    sagemaker_session=sess,\n",
    "    base_job_name=job_name,\n",
    "    hyperparameters=config,\n",
    "    metric_definitions=[\n",
    "        {\n",
    "            \"Name\": \"algo:pnl\",\n",
    "            \"Regex\": \"Total PnL:(.*?)]\"\n",
    "        },\n",
    "        {\n",
    "            \"Name\": \"algo:sharpe_ratio\",\n",
    "            \"Regex\": \"Sharpe Ratio:(.*?),\"\n",
    "        }\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "algo.fit(data_location)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Get Algo Metrics\n",
    "from sagemaker.analytics import TrainingJobAnalytics\n",
    "\n",
    "latest_job_name = algo.latest_training_job.job_name\n",
    "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n",
    "metrics_dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Get Algo Chart from S3\n",
    "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n",
    "import boto3\n",
    "s3 = boto3.resource('s3')\n",
    "my_bucket = s3.Bucket(sess.default_bucket())\n",
    "my_bucket.download_file(model_name,'model.tar.gz')\n",
    "!tar -xzf model.tar.gz\n",
    "!rm model.tar.gz\n",
    "from IPython.display import Image\n",
    "Image(filename='chart.png') "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 6) Run Hyperparameter Optimization with SageMaker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker.tuner import (\n",
    "    IntegerParameter,\n",
    "    CategoricalParameter,\n",
    "    ContinuousParameter,\n",
    "    HyperparameterTuner,\n",
    ")\n",
    "\n",
    "hyperparameter_ranges = {\n",
    "    \"fast_period\": IntegerParameter(5, 10),\n",
    "    \"slow_period\": IntegerParameter(21, 31)\n",
    "}\n",
    "objective_metric_name= \"algo:pnl\"\n",
    "tuner = HyperparameterTuner(algo,\n",
    "    objective_metric_name,\n",
    "    hyperparameter_ranges,\n",
    "    max_jobs=6,\n",
    "    max_parallel_jobs=3,\n",
    "    metric_definitions=[\n",
    "        {\n",
    "            \"Name\": \"algo:pnl\",\n",
    "            \"Regex\": \"Total PnL:(.*?)]\"\n",
    "        }\n",
    "    ]\n",
    "   )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tuner.fit(data_location)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "best_params=boto3.client('sagemaker').describe_hyper_parameter_tuning_job(\n",
    "HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)['BestTrainingJob']['TunedHyperParameters']\n",
    "best_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker.analytics import TrainingJobAnalytics\n",
    "bestjob=tuner.best_training_job()\n",
    "metrics_dataframe = TrainingJobAnalytics(training_job_name=bestjob).dataframe()\n",
    "metrics_dataframe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 7) Backtest Locally with Optimal Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use optimal hyperparameter and test data\n",
    "conf_file='/opt/ml/input/config/hyperparameters.json'\n",
    "with open(conf_file, 'r') as f:\n",
    "    config = json.load(f)\n",
    "config['fast_period']=best_params['fast_period']\n",
    "config['slow_period']=best_params['slow_period']\n",
    "config['chart']='false'\n",
    "print(config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run /opt/program/train"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Congratulations! You've completed this strategy."
   ]
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}