{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7daa0a33",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sagemaker\n",
    "from sagemaker.pytorch import PyTorch\n",
    "import boto3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55b9f811",
   "metadata": {},
   "outputs": [],
   "source": [
    "s3_client = boto3.client(\"s3\")\n",
    "sess = sagemaker.session.Session()\n",
    "role = sagemaker.get_execution_role()\n",
    "bucket = sess.default_bucket()\n",
    "key_prefix = \"pt_lightning_ddp_tune\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d740c86d",
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz && tar xzf cifar-10-python.tar.gz && rm cifar-10-python.tar.gz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2469eabb",
   "metadata": {},
   "outputs": [],
   "source": [
    "cifar_data_path = sess.upload_data(\"cifar-10-batches-py\", bucket, key_prefix=f\"{key_prefix}/input_data/cifar-10-batches-py\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5c448a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "cifar_path = \"/\".join(cifar_data_path.split(\"/\")[:-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3030f827",
   "metadata": {},
   "outputs": [],
   "source": [
    "# optionally set subnets and security_groups\n",
    "subnets=None\n",
    "security_group_ids=None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "392c20ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "estimator_gpu_tune_cifar = PyTorch(\n",
    "    source_dir = \"src\",\n",
    "    entry_point=\"tune_cifar.py\",\n",
    "    subnets=subnets,\n",
    "    security_group_ids=security_group_ids,\n",
    "    role=role,\n",
    "    instance_count=2, \n",
    "    instance_type=\"ml.g4dn.xlarge\", # instance with 1 GPUs. use g4dn.12xlarge or g5.12xlarge for multi-gpu instances\n",
    "    framework_version=\"1.10\",\n",
    "    py_version=\"py38\",\n",
    "    hyperparameters={\"use-gpu\":True, # use GPU for training\n",
    "                     \"num-samples\":4, # number of trials to run for HPO\n",
    "                     \"num-workers\":2, # number of GPUs to use for each training run with Data Parallel distributed training\n",
    "                     \"num-epochs\":5} # number of epochs to train each model on\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5d70f6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "estimator_gpu_tune_cifar.fit({\"train\": cifar_path})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b93c9d0-d718-4c5e-9d73-7a5bd6bbc15a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}