{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get the Personalize boto3 Client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "\n",
    "import json\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import time\n",
    "\n",
    "personalize = boto3.client('personalize')\n",
    "personalize_runtime = boto3.client('personalize-runtime')\n",
    "iam = boto3.client(\"iam\")\n",
    "s3 = boto3.client(\"s3\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Specify a Bucket and Data Output Location"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "bucket = \"personalize-demo\"       # replace with the name of your S3 bucket\n",
    "filename = \"movie-lens-100k.csv\"  # replace with a name that you want to save the dataset under"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Download, Prepare, and Upload Training Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Download and Explore the Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2020-05-05 09:38:29--  http://files.grouplens.org/datasets/movielens/ml-100k.zip\n",
      "Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152\n",
      "Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.\n",
      "HTTP request sent, awaiting response... 304 Not Modified\n",
      "File ‘ml-100k.zip’ not modified on server. Omitting download.\n",
      "\n",
      "Archive:  ml-100k.zip\n",
      "  inflating: ml-100k/allbut.pl       \n",
      "  inflating: ml-100k/mku.sh          \n",
      "  inflating: ml-100k/README          \n",
      "  inflating: ml-100k/u.data          \n",
      "  inflating: ml-100k/u.genre         \n",
      "  inflating: ml-100k/u.info          \n",
      "  inflating: ml-100k/u.item          \n",
      "  inflating: ml-100k/u.occupation    \n",
      "  inflating: ml-100k/u.user          \n",
      "  inflating: ml-100k/u1.base         \n",
      "  inflating: ml-100k/u1.test         \n",
      "  inflating: ml-100k/u2.base         \n",
      "  inflating: ml-100k/u2.test         \n",
      "  inflating: ml-100k/u3.base         \n",
      "  inflating: ml-100k/u3.test         \n",
      "  inflating: ml-100k/u4.base         \n",
      "  inflating: ml-100k/u4.test         \n",
      "  inflating: ml-100k/u5.base         \n",
      "  inflating: ml-100k/u5.test         \n",
      "  inflating: ml-100k/ua.base         \n",
      "  inflating: ml-100k/ua.test         \n",
      "  inflating: ml-100k/ub.base         \n",
      "  inflating: ml-100k/ub.test         \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>USER_ID</th>\n",
       "      <th>ITEM_ID</th>\n",
       "      <th>RATING</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>196</td>\n",
       "      <td>242</td>\n",
       "      <td>3</td>\n",
       "      <td>881250949</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>186</td>\n",
       "      <td>302</td>\n",
       "      <td>3</td>\n",
       "      <td>891717742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99998</th>\n",
       "      <td>13</td>\n",
       "      <td>225</td>\n",
       "      <td>2</td>\n",
       "      <td>882399156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99999</th>\n",
       "      <td>12</td>\n",
       "      <td>203</td>\n",
       "      <td>3</td>\n",
       "      <td>879959583</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       USER_ID  ITEM_ID  RATING  TIMESTAMP\n",
       "0          196      242       3  881250949\n",
       "1          186      302       3  891717742\n",
       "...        ...      ...     ...        ...\n",
       "99998       13      225       2  882399156\n",
       "99999       12      203       3  879959583\n",
       "\n",
       "[100000 rows x 4 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "!wget -N http://files.grouplens.org/datasets/movielens/ml-100k.zip\n",
    "!unzip -o ml-100k.zip\n",
    "data = pd.read_csv('./ml-100k/u.data', sep='\\t', names=['USER_ID', 'ITEM_ID', 'RATING', 'TIMESTAMP'])\n",
    "pd.set_option('display.max_rows', 5)\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optional security practice: Protect data at rest - Encrypt/decrypt your dataset\n",
    "We are using a pre-made dataset that hasn't been encrypted so there is no need to decrypt this dataset. However, it would be a good security practice to store your datasets encrypted.\n",
    "\n",
    "For more information on encrypting your data when using S3, visit https://docs.aws.amazon.com/AmazonS3/latest/dev/KMSUsingRESTAPI.html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optional security practice: Protect data in transit - SSL access only for S3 bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ResponseMetadata': {'RequestId': 'FA2BA6A24A738415',\n",
       "  'HostId': 'PWU2HsPCLALBFzYHzVEUK5EuODQkMTrj2l9IuCs3x+GBDlM24tg2k4BF1fMlnmzCAwy/KRVplQw=',\n",
       "  'HTTPStatusCode': 204,\n",
       "  'HTTPHeaders': {'x-amz-id-2': 'PWU2HsPCLALBFzYHzVEUK5EuODQkMTrj2l9IuCs3x+GBDlM24tg2k4BF1fMlnmzCAwy/KRVplQw=',\n",
       "   'x-amz-request-id': 'FA2BA6A24A738415',\n",
       "   'date': 'Tue, 05 May 2020 16:38:34 GMT',\n",
       "   'server': 'AmazonS3'},\n",
       "  'RetryAttempts': 0}}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "requires_ssl_access_policy = {\n",
    "    \"Version\": \"2012-10-17\",\n",
    "    \"Id\": \"RequireSSLAccess\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "            \"Sid\": \"RequireSSLAccess\",\n",
    "            \"Effect\": \"Deny\",\n",
    "            \"Principal\": \"*\",\n",
    "            \"Action\": \"*\",\n",
    "            \"Resource\": [\n",
    "                \"arn:aws:s3:::{}\".format(bucket),\n",
    "                \"arn:aws:s3:::{}/*\".format(bucket)\n",
    "            ],\n",
    "            \"Condition\": {\n",
    "                \"Bool\": {\n",
    "                    \"aws:SecureTransport\": \"false\"\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\n",
    "s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(requires_ssl_access_policy))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Additional security note:\n",
    "Some users prevent accidental information disclosure by limiting S3 access to only come from a VPC. Another common security practice is to validate this limited access. It should be noted that this security check will fail when performed against S3 buckets used for Personalize - as Personalize copies data from the user's S3 into the internal systems used by Personalize (during dataset import jobs)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optional security practice: validate bucket owner matches your account canonical id\n",
    "[More information about canonical ids here](https://docs.aws.amazon.com/general/latest/gr/acct-identifiers.html#FindingCanonicalId)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This bucket belongs to: 28398dc6b1acac01a4a73b246b5f9c9a688f50b9ce70240f74c0f90ebf5e2c61 \n"
     ]
    }
   ],
   "source": [
    "bucket_owner_id = boto3.client('s3').get_bucket_acl(Bucket=bucket)['Owner']['ID']\n",
    "print(\"This bucket belongs to: {} \".format(bucket_owner_id))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optional security practice: Protect data integrity - Enable S3 bucket versioning\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ResponseMetadata': {'RequestId': '4C3AA32EF96CA240',\n",
       "  'HostId': 'HQaKmMvUTbXTRsxLL/9BubrfD09xisEO8x72cpQP0syhAH9dXKRx7gqmzchq3TbERdwprUKcRmQ=',\n",
       "  'HTTPStatusCode': 200,\n",
       "  'HTTPHeaders': {'x-amz-id-2': 'HQaKmMvUTbXTRsxLL/9BubrfD09xisEO8x72cpQP0syhAH9dXKRx7gqmzchq3TbERdwprUKcRmQ=',\n",
       "   'x-amz-request-id': '4C3AA32EF96CA240',\n",
       "   'date': 'Mon, 04 May 2020 20:46:23 GMT',\n",
       "   'content-length': '0',\n",
       "   'server': 'AmazonS3'},\n",
       "  'RetryAttempts': 0}}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s3_resource = boto3.resource('s3')\n",
    "bucket_versioning = s3_resource.BucketVersioning(bucket)\n",
    "bucket_versioning.enable()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Prepare and Upload Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data[data['RATING'] > 3.6]                # keep only movies rated 3.6 and above\n",
    "data = data[['USER_ID', 'ITEM_ID', 'TIMESTAMP']] # select columns that match the columns in the schema below\n",
    "data.to_csv(filename, index=False)\n",
    "\n",
    "boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create Schema"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"schemaArn\": \"arn:aws:personalize:us-west-2:237539672711:schema/DEMO-schema\", \n",
      "  \"ResponseMetadata\": {\n",
      "    \"RetryAttempts\": 0, \n",
      "    \"HTTPStatusCode\": 200, \n",
      "    \"RequestId\": \"12eb7cba-2b64-4be9-9f6e-eeebff7629a5\", \n",
      "    \"HTTPHeaders\": {\n",
      "      \"date\": \"Tue, 04 Dec 2018 05:49:04 GMT\", \n",
      "      \"x-amzn-requestid\": \"12eb7cba-2b64-4be9-9f6e-eeebff7629a5\", \n",
      "      \"content-length\": \"79\", \n",
      "      \"content-type\": \"application/x-amz-json-1.1\", \n",
      "      \"connection\": \"keep-alive\"\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "schema = {\n",
    "    \"type\": \"record\",\n",
    "    \"name\": \"Interactions\",\n",
    "    \"namespace\": \"com.amazonaws.personalize.schema\",\n",
    "    \"fields\": [\n",
    "        {\n",
    "            \"name\": \"USER_ID\",\n",
    "            \"type\": \"string\"\n",
    "        },\n",
    "        {\n",
    "            \"name\": \"ITEM_ID\",\n",
    "            \"type\": \"string\"\n",
    "        },\n",
    "        {\n",
    "            \"name\": \"TIMESTAMP\",\n",
    "            \"type\": \"long\"\n",
    "        }\n",
    "    ],\n",
    "    \"version\": \"1.0\"\n",
    "}\n",
    "\n",
    "create_schema_response = personalize.create_schema(\n",
    "    name = \"DEMO-schema\",\n",
    "    schema = json.dumps(schema)\n",
    ")\n",
    "\n",
    "schema_arn = create_schema_response['schemaArn']\n",
    "print(json.dumps(create_schema_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optional security practice - Protect data at rest - Encrypt datasets under Personalize\n",
    "If you skip this step, do not pass kmsKeyArn or roleArn when you create your dataset group."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "arn:aws:kms:us-west-2:001513653716:key/f5be82af-a160-4c49-813e-e5448fa95693\n",
      "arn:aws:iam::001513653716:role/AccessPersonalizeDatasetRole\n"
     ]
    }
   ],
   "source": [
    "kmsKeyArn = boto3.client('kms').create_key(Description=\"personalize-data\")['KeyMetadata']['Arn']\n",
    "print(kmsKeyArn)\n",
    "key_accessor_policy_name = \"AccessPersonalizeDatasetPolicy\"\n",
    "key_accessor_policy = {\n",
    "  \"Version\": \"2012-10-17\",\n",
    "  \"Statement\": {\n",
    "    \"Effect\": \"Allow\",\n",
    "    \"Action\": [\n",
    "      \"kms:*\"\n",
    "    ],\n",
    "    \"Resource\": [ kmsKeyArn ]\n",
    "  }\n",
    "}\n",
    "key_access_policy = iam.create_policy(\n",
    "    PolicyName = key_accessor_policy_name,\n",
    "    PolicyDocument = json.dumps(key_accessor_policy)\n",
    ")\n",
    "\n",
    "key_access_role_name = \"AccessPersonalizeDatasetRole\"\n",
    "assume_role_policy_document = {\n",
    "    \"Version\": \"2012-10-17\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "          \"Effect\": \"Allow\",\n",
    "          \"Principal\": {\n",
    "            \"Service\": \"personalize.amazonaws.com\"\n",
    "          },\n",
    "          \"Action\": \"sts:AssumeRole\"\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\n",
    "create_role_response = iam.create_role(\n",
    "    RoleName = key_access_role_name,\n",
    "    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)\n",
    ")\n",
    "iam.attach_role_policy(\n",
    "    RoleName = create_role_response[\"Role\"][\"RoleName\"],\n",
    "    PolicyArn = key_access_policy[\"Policy\"][\"Arn\"]\n",
    ")\n",
    "\n",
    "keyAccessRoleArn = create_role_response[\"Role\"][\"Arn\"]\n",
    "print(keyAccessRoleArn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create and Wait for Encrypted Dataset Group"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create Encrypted Dataset Group\n",
    "If you did not create a KMS Key and IAM role from the last step, then do not pass in roleArn or kmsKeyArn."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "ClientError",
     "evalue": "An error occurred (AccessDeniedException) when calling the CreateDatasetGroup operation: Cross-account pass role is not allowed.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mClientError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-5-d867662faf2d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"DEMO-dataset-group-aadata\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0mroleArn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"arn:aws:iam::261294318658:role/AccessPersonalizeDatasetRole\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mkmsKeyArn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"arn:aws:kms:us-west-2:261294318658:key/be6db2bb-1fe8-43a5-8134-0762740a22b9\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m )\n\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/boto3/src/botocore/botocore/client.py\u001b[0m in \u001b[0;36m_api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    314\u001b[0m                     \"%s() only accepts keyword arguments.\" % py_operation_name)\n\u001b[1;32m    315\u001b[0m             \u001b[0;31m# The \"self\" in this scope is referring to the BaseClient.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 316\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_api_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moperation_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    317\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    318\u001b[0m         \u001b[0m_api_call\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpy_operation_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/boto3/src/botocore/botocore/client.py\u001b[0m in \u001b[0;36m_make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m    633\u001b[0m             \u001b[0merror_code\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsed_response\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Error\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Code\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    634\u001b[0m             \u001b[0merror_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 635\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0merror_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_response\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moperation_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    636\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    637\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mparsed_response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mClientError\u001b[0m: An error occurred (AccessDeniedException) when calling the CreateDatasetGroup operation: Cross-account pass role is not allowed."
     ]
    }
   ],
   "source": [
    "create_dataset_group_response = personalize.create_dataset_group(\n",
    "    name = \"DEMO-dataset-group\",\n",
    "    roleArn = keyAccessRoleArn,\n",
    "    kmsKeyArn = kmsKeyArn\n",
    ")\n",
    "\n",
    "dataset_group_arn = create_dataset_group_response['datasetGroupArn']\n",
    "print(json.dumps(create_dataset_group_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Wait for Dataset Group to Have ACTIVE Status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatasetGroup: CREATE PENDING\n",
      "DatasetGroup: CREATE FAILED\n"
     ]
    }
   ],
   "source": [
    "max_time = time.time() + 3*60*60 # 3 hours\n",
    "while time.time() < max_time:\n",
    "    describe_dataset_group_response = personalize.describe_dataset_group(\n",
    "        datasetGroupArn = dataset_group_arn\n",
    "    )\n",
    "    status = describe_dataset_group_response[\"datasetGroup\"][\"status\"]\n",
    "    print(\"DatasetGroup: {}\".format(status))\n",
    "    \n",
    "    if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n",
    "        break\n",
    "        \n",
    "    time.sleep(60)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"ResponseMetadata\": {\n",
      "    \"RetryAttempts\": 0, \n",
      "    \"HTTPStatusCode\": 200, \n",
      "    \"RequestId\": \"29ab75c8-df6e-4807-943f-1b48014181d1\", \n",
      "    \"HTTPHeaders\": {\n",
      "      \"date\": \"Tue, 04 Dec 2018 05:50:19 GMT\", \n",
      "      \"x-amzn-requestid\": \"29ab75c8-df6e-4807-943f-1b48014181d1\", \n",
      "      \"content-length\": \"101\", \n",
      "      \"content-type\": \"application/x-amz-json-1.1\", \n",
      "      \"connection\": \"keep-alive\"\n",
      "    }\n",
      "  }, \n",
      "  \"datasetArn\": \"arn:aws:personalize:us-west-2:237539672711:dataset/DEMO-dataset-group/INTERACTIONS\"\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "dataset_type = \"INTERACTIONS\"\n",
    "create_dataset_response = personalize.create_dataset(\n",
    "    name = \"DEMO-dataset\",\n",
    "    datasetType = dataset_type,\n",
    "    datasetGroupArn = dataset_group_arn,\n",
    "    schemaArn = schema_arn\n",
    ")\n",
    "\n",
    "dataset_arn = create_dataset_response['datasetArn']\n",
    "print(json.dumps(create_dataset_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare, Create, and Wait for Dataset Import Job"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Attach Policy to S3 Bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "policy = {\n",
    "    \"Version\": \"2012-10-17\",\n",
    "    \"Id\": \"PersonalizeS3BucketAccessPolicy\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "            \"Sid\": \"PersonalizeS3BucketAccessPolicy\",\n",
    "            \"Effect\": \"Allow\",\n",
    "            \"Principal\": {\n",
    "                \"Service\": \"personalize.amazonaws.com\"\n",
    "            },\n",
    "            \"Action\": [\n",
    "                \"s3:GetObject\",\n",
    "                \"s3:ListBucket\"\n",
    "            ],\n",
    "            \"Resource\": [\n",
    "                \"arn:aws:s3:::{}\".format(bucket),\n",
    "                \"arn:aws:s3:::{}/*\".format(bucket)\n",
    "            ]\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\n",
    "s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create Personalize Role"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "arn:aws:iam::660166145966:role/PersonalizeRole2\n"
     ]
    }
   ],
   "source": [
    "role_name = \"PersonalizeRole\"\n",
    "assume_role_policy_document = {\n",
    "    \"Version\": \"2012-10-17\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "          \"Effect\": \"Allow\",\n",
    "          \"Principal\": {\n",
    "            \"Service\": \"personalize.amazonaws.com\"\n",
    "          },\n",
    "          \"Action\": \"sts:AssumeRole\"\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\n",
    "create_role_response = iam.create_role(\n",
    "    RoleName = role_name,\n",
    "    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)\n",
    ")\n",
    "\n",
    "# AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes \"personalize\" or \"Personalize\" \n",
    "# if you would like to use a bucket with a different name, please consider creating and attaching a new policy\n",
    "# that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role\n",
    "policy_arn = \"arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess\"\n",
    "iam.attach_role_policy(\n",
    "    RoleName = role_name,\n",
    "    PolicyArn = policy_arn\n",
    ")\n",
    "\n",
    "time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate\n",
    "\n",
    "role_arn = create_role_response[\"Role\"][\"Arn\"]\n",
    "print(role_arn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create Dataset Import Job"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"datasetImportJobArn\": \"arn:aws:personalize:us-west-2:237539672711:dataset-import-job/DEMO-dataset-import-job\", \n",
      "  \"ResponseMetadata\": {\n",
      "    \"RetryAttempts\": 0, \n",
      "    \"HTTPStatusCode\": 200, \n",
      "    \"RequestId\": \"3c77fe8d-d9fe-4ca5-ad03-b18e937acbb3\", \n",
      "    \"HTTPHeaders\": {\n",
      "      \"date\": \"Tue, 04 Dec 2018 05:50:55 GMT\", \n",
      "      \"x-amzn-requestid\": \"3c77fe8d-d9fe-4ca5-ad03-b18e937acbb3\", \n",
      "      \"content-length\": \"113\", \n",
      "      \"content-type\": \"application/x-amz-json-1.1\", \n",
      "      \"connection\": \"keep-alive\"\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "create_dataset_import_job_response = personalize.create_dataset_import_job(\n",
    "    jobName = \"DEMO-dataset-import-job\",\n",
    "    datasetArn = dataset_arn,\n",
    "    dataSource = {\n",
    "        \"dataLocation\": \"s3://{}/{}\".format(bucket, filename)\n",
    "    },\n",
    "    roleArn = role_arn\n",
    ")\n",
    "\n",
    "dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']\n",
    "print(json.dumps(create_dataset_import_job_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Wait for Dataset Import Job to Have ACTIVE Status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatasetImportJob: CREATE PENDING\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: CREATE IN_PROGRESS\n",
      "DatasetImportJob: ACTIVE\n"
     ]
    }
   ],
   "source": [
    "max_time = time.time() + 3*60*60 # 3 hours\n",
    "while time.time() < max_time:\n",
    "    describe_dataset_import_job_response = personalize.describe_dataset_import_job(\n",
    "        datasetImportJobArn = dataset_import_job_arn\n",
    "    )\n",
    "    status = describe_dataset_import_job_response[\"datasetImportJob\"]['status']\n",
    "    print(\"DatasetImportJob: {}\".format(status))\n",
    "    \n",
    "    if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n",
    "        break\n",
    "        \n",
    "    time.sleep(60)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Select Recipe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ResponseMetadata': {'HTTPHeaders': {'connection': 'keep-alive',\n",
       "   'content-length': '1287',\n",
       "   'content-type': 'application/x-amz-json-1.1',\n",
       "   'date': 'Fri, 22 Mar 2019 19:28:37 GMT',\n",
       "   'x-amzn-requestid': '275695de-45b2-4077-82d4-c961ceeaf367'},\n",
       "  'HTTPStatusCode': 200,\n",
       "  'RequestId': '275695de-45b2-4077-82d4-c961ceeaf367',\n",
       "  'RetryAttempts': 0},\n",
       " u'recipes': [{u'creationDateTime': datetime.datetime(2018, 11, 25, 16, 0, tzinfo=tzlocal()),\n",
       "   u'lastUpdatedDateTime': datetime.datetime(1969, 12, 31, 16, 0, tzinfo=tzlocal()),\n",
       "   u'name': u'aws-hrnn',\n",
       "   u'recipeArn': u'arn:aws:personalize:::recipe/aws-hrnn',\n",
       "   u'status': u'ACTIVE'},\n",
       "  {u'creationDateTime': datetime.datetime(2018, 11, 25, 16, 0, tzinfo=tzlocal()),\n",
       "   u'lastUpdatedDateTime': datetime.datetime(1969, 12, 31, 16, 0, tzinfo=tzlocal()),\n",
       "   u'name': u'aws-hrnn-coldstart',\n",
       "   u'recipeArn': u'arn:aws:personalize:::recipe/aws-hrnn-coldstart',\n",
       "   u'status': u'ACTIVE'},\n",
       "  {u'creationDateTime': datetime.datetime(2018, 11, 25, 16, 0, tzinfo=tzlocal()),\n",
       "   u'lastUpdatedDateTime': datetime.datetime(1969, 12, 31, 16, 0, tzinfo=tzlocal()),\n",
       "   u'name': u'aws-hrnn-metadata',\n",
       "   u'recipeArn': u'arn:aws:personalize:::recipe/aws-hrnn-metadata',\n",
       "   u'status': u'ACTIVE'},\n",
       "  {u'creationDateTime': datetime.datetime(2018, 11, 25, 16, 0, tzinfo=tzlocal()),\n",
       "   u'lastUpdatedDateTime': datetime.datetime(1969, 12, 31, 16, 0, tzinfo=tzlocal()),\n",
       "   u'name': u'aws-personalized-ranking',\n",
       "   u'recipeArn': u'arn:aws:personalize:::recipe/aws-personalized-ranking',\n",
       "   u'status': u'ACTIVE'},\n",
       "  {u'creationDateTime': datetime.datetime(2018, 11, 25, 16, 0, tzinfo=tzlocal()),\n",
       "   u'lastUpdatedDateTime': datetime.datetime(1969, 12, 31, 16, 0, tzinfo=tzlocal()),\n",
       "   u'name': u'aws-popularity-count',\n",
       "   u'recipeArn': u'arn:aws:personalize:::recipe/aws-popularity-count',\n",
       "   u'status': u'ACTIVE'},\n",
       "  {u'creationDateTime': datetime.datetime(2018, 11, 25, 16, 0, tzinfo=tzlocal()),\n",
       "   u'lastUpdatedDateTime': datetime.datetime(1969, 12, 31, 16, 0, tzinfo=tzlocal()),\n",
       "   u'name': u'aws-sims',\n",
       "   u'recipeArn': u'arn:aws:personalize:::recipe/aws-sims',\n",
       "   u'status': u'ACTIVE'}]}"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_recipes_response = personalize.list_recipes()\n",
    "recipe_arn = \"arn:aws:personalize:::recipe/aws-hrnn\" # aws-hrnn selected for demo purposes\n",
    "list_recipes_response"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create and Wait for Solution"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create Solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"solutionArn\": \"arn:aws:personalize:us-west-2:237539672711:solution/DEMO-solution\", \n",
      "  \"ResponseMetadata\": {\n",
      "    \"RetryAttempts\": 0, \n",
      "    \"HTTPStatusCode\": 200, \n",
      "    \"RequestId\": \"2042832f-0775-43e2-86de-53a061be1f63\", \n",
      "    \"HTTPHeaders\": {\n",
      "      \"date\": \"Mon, 03 Dec 2018 23:55:17 GMT\", \n",
      "      \"x-amzn-requestid\": \"2042832f-0775-43e2-86de-53a061be1f63\", \n",
      "      \"content-length\": \"83\", \n",
      "      \"content-type\": \"application/x-amz-json-1.1\", \n",
      "      \"connection\": \"keep-alive\"\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "create_solution_response = personalize.create_solution(\n",
    "    name = \"DEMO-solution\",\n",
    "    datasetGroupArn = dataset_group_arn,\n",
    "    recipeArn = recipe_arn\n",
    ")\n",
    "\n",
    "solution_arn = create_solution_response['solutionArn']\n",
    "print(json.dumps(create_solution_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create Solution Version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"solutionVersionArn\": \"arn:aws:personalize:us-west-2:237539672711:solution/DEMO-solution/702e0792\", \n",
      "  \"ResponseMetadata\": {\n",
      "    \"RetryAttempts\": 0, \n",
      "    \"HTTPStatusCode\": 200, \n",
      "    \"RequestId\": \"2042832f-0775-43e2-86de-53a061be1f65\", \n",
      "    \"HTTPHeaders\": {\n",
      "      \"date\": \"Mon, 03 Dec 2018 23:55:17 GMT\", \n",
      "      \"x-amzn-requestid\": \"2042832f-0775-43e2-86de-53a061be1f65\", \n",
      "      \"content-length\": \"90\", \n",
      "      \"content-type\": \"application/x-amz-json-1.1\", \n",
      "      \"connection\": \"keep-alive\"\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "create_solution_version_response = personalize.create_solution_version(\n",
    "    solutionArn = solution_arn\n",
    ")\n",
    "\n",
    "solution_version_arn = create_solution_version_response['solutionVersionArn']\n",
    "print(json.dumps(create_solution_version_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Wait for Solution Version to Have ACTIVE Status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SolutionVersion: CREATE PENDING\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: CREATE IN_PROGRESS\n",
      "SolutionVersion: ACTIVE\n"
     ]
    }
   ],
   "source": [
    "max_time = time.time() + 3*60*60 # 3 hours\n",
    "while time.time() < max_time:\n",
    "    describe_solution_version_response = personalize.describe_solution_version(\n",
    "        solutionVersionArn = solution_version_arn\n",
    "    )\n",
    "    status = describe_solution_version_response[\"solutionVersion\"][\"status\"]\n",
    "    print(\"SolutionVersion: {}\".format(status))\n",
    "    \n",
    "    if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n",
    "        break\n",
    "        \n",
    "    time.sleep(60)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get Metrics of Solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"metrics\": {\n",
      "    \"coverage\": 0.2603, \n",
      "    \"mean_reciprocal_rank_at_25\": 0.0539, \n",
      "    \"normalized_discounted_cumulative_gain_at_5\": 0.0486, \n",
      "    \"normalized_discounted_cumulative_gain_at_10\": 0.0649, \n",
      "    \"normalized_discounted_cumulative_gain_at_25\": 0.0918, \n",
      "    \"precision_at_5\": 0.0109, \n",
      "    \"precision_at_10\": 0.0098, \n",
      "    \"precision_at_25\": 0.0083, \n",
      "  }, \n",
      "  \"solutionVersionArn\": \"arn:aws:personalize:us-west-2:237539672711:solution/DEMO-solution/702e0792\", \n",
      "  \"ResponseMetadata\": {\n",
      "    \"RetryAttempts\": 0, \n",
      "    \"HTTPStatusCode\": 200, \n",
      "    \"RequestId\": \"5b5f4f4f-5249-4c0e-9f83-45e3fe22f09f\", \n",
      "    \"HTTPHeaders\": {\n",
      "      \"date\": \"Tue, 04 Dec 2018 00:53:54 GMT\", \n",
      "      \"x-amzn-requestid\": \"5b5f4f4f-5249-4c0e-9f83-45e3fe22f09f\", \n",
      "      \"content-length\": \"724\", \n",
      "      \"content-type\": \"application/x-amz-json-1.1\", \n",
      "      \"connection\": \"keep-alive\"\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "get_solution_metrics_response = personalize.get_solution_metrics(\n",
    "    solutionVersionArn = solution_version_arn\n",
    ")\n",
    "\n",
    "print(json.dumps(get_solution_metrics_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create and Wait for Campaign"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create Campaign"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"campaignArn\": \"arn:aws:personalize:us-west-2:237539672711:campaign/DEMO-campaign\", \n",
      "  \"ResponseMetadata\": {\n",
      "    \"RetryAttempts\": 0, \n",
      "    \"HTTPStatusCode\": 200, \n",
      "    \"RequestId\": \"527e97ba-683c-4dc7-8218-00716f22c904\", \n",
      "    \"HTTPHeaders\": {\n",
      "      \"date\": \"Tue, 04 Dec 2018 00:54:17 GMT\", \n",
      "      \"x-amzn-requestid\": \"527e97ba-683c-4dc7-8218-00716f22c904\", \n",
      "      \"content-length\": \"83\", \n",
      "      \"content-type\": \"application/x-amz-json-1.1\", \n",
      "      \"connection\": \"keep-alive\"\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "create_campaign_response = personalize.create_campaign(\n",
    "    name = \"DEMO-campaign\",\n",
    "    solutionVersionArn = solution_version_arn,\n",
    "    minProvisionedTPS = 1\n",
    ")\n",
    "\n",
    "campaign_arn = create_campaign_response['campaignArn']\n",
    "print(json.dumps(create_campaign_response, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Wait for Campaign to Have ACTIVE Status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Campaign: CREATE PENDING\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: CREATE IN_PROGRESS\n",
      "Campaign: ACTIVE\n"
     ]
    }
   ],
   "source": [
    "max_time = time.time() + 3*60*60 # 3 hours\n",
    "while time.time() < max_time:\n",
    "    describe_campaign_response = personalize.describe_campaign(\n",
    "        campaignArn = campaign_arn\n",
    "    )\n",
    "    status = describe_campaign_response[\"campaign\"][\"status\"]\n",
    "    print(\"Campaign: {}\".format(status))\n",
    "    \n",
    "    if status == \"ACTIVE\" or status == \"CREATE FAILED\":\n",
    "        break\n",
    "        \n",
    "    time.sleep(60)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get Recommendations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Select a User and an Item"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "USER: 711\n",
      "ITEM: Silence of the Lambs, The (1991)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ITEM_ID</th>\n",
       "      <th>TITLE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>GoldenEye (1995)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1680</th>\n",
       "      <td>1681</td>\n",
       "      <td>You So Crazy (1994)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1681</th>\n",
       "      <td>1682</td>\n",
       "      <td>Scream of Stone (Schrei aus Stein) (1991)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1682 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      ITEM_ID                                      TITLE\n",
       "0           1                           Toy Story (1995)\n",
       "1           2                           GoldenEye (1995)\n",
       "...       ...                                        ...\n",
       "1680     1681                        You So Crazy (1994)\n",
       "1681     1682  Scream of Stone (Schrei aus Stein) (1991)\n",
       "\n",
       "[1682 rows x 2 columns]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "items = pd.read_csv('./ml-100k/u.item', sep='|', usecols=[0,1], encoding='latin-1')\n",
    "items.columns = ['ITEM_ID', 'TITLE']\n",
    "\n",
    "user_id, item_id, _ = data.sample().values[0]\n",
    "item_title = items.loc[items['ITEM_ID'] == item_id].values[0][-1]\n",
    "print(\"USER: {}\".format(user_id))\n",
    "print(\"ITEM: {}\".format(item_title))\n",
    "\n",
    "items"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Call GetRecommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recommendations: [\n",
      "  \"Godfather, The (1972)\", \n",
      "  \"Contact (1997)\", \n",
      "  \"Titanic (1997)\", \n",
      "  \"Star Wars (1977)\", \n",
      "  \"Fargo (1996)\", \n",
      "  \"Liar Liar (1997)\", \n",
      "  \"Evita (1996)\", \n",
      "  \"Jerry Maguire (1996)\", \n",
      "  \"Scream (1996)\", \n",
      "  \"Devil's Advocate, The (1997)\", \n",
      "  \"Full Monty, The (1997)\", \n",
      "  \"Conspiracy Theory (1997)\", \n",
      "  \"Edge, The (1997)\", \n",
      "  \"Sense and Sensibility (1995)\", \n",
      "  \"English Patient, The (1996)\", \n",
      "  \"Twelve Monkeys (1995)\", \n",
      "  \"L.A. Confidential (1997)\", \n",
      "  \"As Good As It Gets (1997)\", \n",
      "  \"In & Out (1997)\", \n",
      "  \"Rock, The (1996)\", \n",
      "  \"Return of the Jedi (1983)\", \n",
      "  \"Amistad (1997)\", \n",
      "  \"Men in Black (1997)\", \n",
      "  \"Truth About Cats & Dogs, The (1996)\", \n",
      "  \"Alien: Resurrection (1997)\"\n",
      "]\n"
     ]
    }
   ],
   "source": [
    "get_recommendations_response = personalize_runtime.get_recommendations(\n",
    "    campaignArn = campaign_arn,\n",
    "    userId = str(user_id),\n",
    "    itemId = str(item_id)\n",
    ")\n",
    "\n",
    "item_list = get_recommendations_response['itemList']\n",
    "title_list = [items.loc[items['ITEM_ID'] == np.int(item['itemId'])].values[0][-1] for item in item_list]\n",
    "\n",
    "print(\"Recommendations: {}\".format(json.dumps(title_list, indent=2)))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}