{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Get the Personalize boto3 Client" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "\n", "import json\n", "import numpy as np\n", "import pandas as pd\n", "import time\n", "\n", "personalize = boto3.client('personalize')\n", "personalize_runtime = boto3.client('personalize-runtime')\n", "iam = boto3.client(\"iam\")\n", "s3 = boto3.client(\"s3\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Specify a Bucket and Data Output Location" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "bucket = \"personalize-demo\" # replace with the name of your S3 bucket\n", "filename = \"movie-lens-100k.csv\" # replace with a name that you want to save the dataset under" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Download, Prepare, and Upload Training Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Download and Explore the Dataset" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2020-05-05 09:38:29-- http://files.grouplens.org/datasets/movielens/ml-100k.zip\n", "Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152\n", "Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.\n", "HTTP request sent, awaiting response... 304 Not Modified\n", "File ‘ml-100k.zip’ not modified on server. Omitting download.\n", "\n", "Archive: ml-100k.zip\n", " inflating: ml-100k/allbut.pl \n", " inflating: ml-100k/mku.sh \n", " inflating: ml-100k/README \n", " inflating: ml-100k/u.data \n", " inflating: ml-100k/u.genre \n", " inflating: ml-100k/u.info \n", " inflating: ml-100k/u.item \n", " inflating: ml-100k/u.occupation \n", " inflating: ml-100k/u.user \n", " inflating: ml-100k/u1.base \n", " inflating: ml-100k/u1.test \n", " inflating: ml-100k/u2.base \n", " inflating: ml-100k/u2.test \n", " inflating: ml-100k/u3.base \n", " inflating: ml-100k/u3.test \n", " inflating: ml-100k/u4.base \n", " inflating: ml-100k/u4.test \n", " inflating: ml-100k/u5.base \n", " inflating: ml-100k/u5.test \n", " inflating: ml-100k/ua.base \n", " inflating: ml-100k/ua.test \n", " inflating: ml-100k/ub.base \n", " inflating: ml-100k/ub.test \n" ] }, { "data": { "text/html": [ "
\n", " | USER_ID | \n", "ITEM_ID | \n", "RATING | \n", "TIMESTAMP | \n", "
---|---|---|---|---|
0 | \n", "196 | \n", "242 | \n", "3 | \n", "881250949 | \n", "
1 | \n", "186 | \n", "302 | \n", "3 | \n", "891717742 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
99998 | \n", "13 | \n", "225 | \n", "2 | \n", "882399156 | \n", "
99999 | \n", "12 | \n", "203 | \n", "3 | \n", "879959583 | \n", "
100000 rows × 4 columns
\n", "\n", " | ITEM_ID | \n", "TITLE | \n", "
---|---|---|
0 | \n", "1 | \n", "Toy Story (1995) | \n", "
1 | \n", "2 | \n", "GoldenEye (1995) | \n", "
... | \n", "... | \n", "... | \n", "
1680 | \n", "1681 | \n", "You So Crazy (1994) | \n", "
1681 | \n", "1682 | \n", "Scream of Stone (Schrei aus Stein) (1991) | \n", "
1682 rows × 2 columns
\n", "