{ "cells": [ { "cell_type": "markdown", "id": "c0bb1151", "metadata": { "id": "1c2a3654-8b51-4150-b16f-9a3773e26390" }, "source": [ "# Build multiclass classifiers with Amazon SageMaker linear learner to predict Risk Index per Region" ] }, { "cell_type": "code", "execution_count": 1, "id": "e4b2b716", "metadata": { "id": "0efeaf67-910c-4372-a0b0-6b37b9d24070" }, "outputs": [], "source": [ "import time\n", "st = time.time()" ] }, { "cell_type": "code", "execution_count": 2, "id": "f7db86d8", "metadata": { "id": "016491122aff46b785a056cdabb1488b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: sagemaker in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (2.112.2)\n", "Requirement already satisfied: pandas in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (1.3.4)\n", "Requirement already satisfied: schema in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (0.7.5)\n", "Requirement already satisfied: pathos in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (0.2.9)\n", "Requirement already satisfied: importlib-metadata<5.0,>=1.4.0 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (4.8.2)\n", "Requirement already satisfied: numpy<2.0,>=1.9.0 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (1.20.3)\n", "Requirement already satisfied: protobuf3-to-dict<1.0,>=0.1.5 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (0.1.5)\n", "Requirement already satisfied: attrs<23,>=20.3.0 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (21.2.0)\n", "Requirement already satisfied: smdebug-rulesconfig==1.0.1 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (1.0.1)\n", "Requirement already satisfied: google-pasta in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (0.2.0)\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (21.3)\n", "Requirement already satisfied: protobuf<4.0,>=3.1 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (3.19.1)\n", "Requirement already satisfied: boto3<2.0,>=1.20.21 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from sagemaker) (1.24.89)\n", "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from boto3<2.0,>=1.20.21->sagemaker) (0.10.0)\n", "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from boto3<2.0,>=1.20.21->sagemaker) (0.6.0)\n", "Requirement already satisfied: botocore<1.28.0,>=1.27.89 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from boto3<2.0,>=1.20.21->sagemaker) (1.27.89)\n", "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from botocore<1.28.0,>=1.27.89->boto3<2.0,>=1.20.21->sagemaker) (2.8.2)\n", "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from botocore<1.28.0,>=1.27.89->boto3<2.0,>=1.20.21->sagemaker) (1.26.7)\n", "Requirement already satisfied: zipp>=0.5 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from importlib-metadata<5.0,>=1.4.0->sagemaker) (3.6.0)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from packaging>=20.0->sagemaker) (3.0.4)\n", "Requirement already satisfied: six in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from protobuf3-to-dict<1.0,>=0.1.5->sagemaker) (1.15.0)\n", "Requirement already satisfied: pytz>=2017.3 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from pandas->sagemaker) (2021.3)\n", "Requirement already satisfied: dill>=0.3.5.1 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from pathos->sagemaker) (0.3.5.1)\n", "Requirement already satisfied: ppft>=1.7.6.5 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from pathos->sagemaker) (1.7.6.5)\n", "Requirement already satisfied: multiprocess>=0.70.13 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from pathos->sagemaker) (0.70.13)\n", "Requirement already satisfied: pox>=0.3.1 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from pathos->sagemaker) (0.3.1)\n", "Requirement already satisfied: contextlib2>=0.5.5 in /opt/conda/envs/Python-3.9/lib/python3.9/site-packages (from schema->sagemaker) (21.6.0)\n" ] } ], "source": [ "!pip install sagemaker" ] }, { "cell_type": "markdown", "id": "e5e47ef7", "metadata": { "id": "9be6fde4-27a3-4df0-ac29-80e694481bda" }, "source": [ "### Import the libraries into the notebook" ] }, { "cell_type": "code", "execution_count": 3, "id": "f007c6c2", "metadata": { "id": "ab4e3b14-063c-41db-81d5-173baf52d73c" }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np \n", "import pandas as pd \n", "import matplotlib.pyplot as plt\n", "import ipaddress\n", "from sklearn import preprocessing\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "import time\n", "import boto3\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import preprocessing\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn import metrics\n", "import seaborn as sns\n", "sns.set(style=\"white\")\n", "sns.set(style=\"whitegrid\", color_codes=True)" ] }, { "cell_type": "markdown", "id": "83bb1656", "metadata": { "id": "60c6ad98-95b7-48bb-8f4b-80b37104f116" }, "source": [ "### Update the credentials of SageMaker Access & Secret Keys" ] }, { "cell_type": "code", "execution_count": 4, "id": "7855bd03", "metadata": { "id": "8d3c2e2e-92dc-488e-af20-1d7d38c22766" }, "outputs": [], "source": [ "S3_BUCKET_REGION = \"us-east-2\"\n", "SAGEMAKER_ACCESS_KEY_ID = \"\"\n", "SAGEMAKER_SECRET_ACCESS_KEY = \"\"" ] }, { "cell_type": "markdown", "id": "5c6544f6", "metadata": { "id": "01cecc0f-b6fe-4664-9f77-e4498b0020ae" }, "source": [ "### Update the SageMaker Full Access role in the below cell" ] }, { "cell_type": "code", "execution_count": 5, "id": "45a4146b", "metadata": { "id": "f66bd626-932f-4c44-b132-b29bc5150573" }, "outputs": [], "source": [ "role=''" ] }, { "cell_type": "markdown", "id": "88ac8bdd", "metadata": { "id": "4551c425-aea9-4002-8745-dc0e39fcb12c" }, "source": [ "### Read the Risk Index per Region data as dataframe and display five records\n", "We will ingest the data files which were pre-processed in the earlier steps using the Data pre-processing Notebook" ] }, { "cell_type": "code", "execution_count": 6, "id": "a32c912f", "metadata": { "id": "aa5f1912-8e3b-4d52-a493-8c674b33bf56" }, "outputs": [ { "data": { "text/html": [ "
\n", " | DATE | \n", "REGION | \n", "Total_cases | \n", "Risk_Index | \n", "
---|---|---|---|---|
0 | \n", "2020-03-15 | \n", "Brussels | \n", "119 | \n", "0 | \n", "
1 | \n", "2020-03-15 | \n", "Flanders | \n", "461 | \n", "2 | \n", "
2 | \n", "2020-03-15 | \n", "Wallonia | \n", "383 | \n", "1 | \n", "
3 | \n", "2020-03-16 | \n", "Brussels | \n", "238 | \n", "0 | \n", "
4 | \n", "2020-03-16 | \n", "Flanders | \n", "794 | \n", "2 | \n", "
\n", " | REGION | \n", "Total_cases | \n", "Risk_Index | \n", "
---|---|---|---|
0 | \n", "Brussels | \n", "119 | \n", "0 | \n", "
1 | \n", "Flanders | \n", "461 | \n", "2 | \n", "
2 | \n", "Wallonia | \n", "383 | \n", "1 | \n", "
3 | \n", "Brussels | \n", "238 | \n", "0 | \n", "
4 | \n", "Flanders | \n", "794 | \n", "2 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
2215 | \n", "Brussels | \n", "677 | \n", "0 | \n", "
2216 | \n", "Flanders | \n", "7909 | \n", "2 | \n", "
2217 | \n", "Wallonia | \n", "1371 | \n", "1 | \n", "
2218 | \n", "Flanders | \n", "295 | \n", "2 | \n", "
2219 | \n", "Wallonia | \n", "33 | \n", "0 | \n", "
2220 rows × 3 columns
\n", "\n", " | REGION | \n", "Total_cases | \n", "Risk_Index | \n", "
---|---|---|---|
0 | \n", "0 | \n", "119 | \n", "0 | \n", "
1 | \n", "1 | \n", "461 | \n", "2 | \n", "
2 | \n", "2 | \n", "383 | \n", "1 | \n", "
3 | \n", "0 | \n", "238 | \n", "0 | \n", "
4 | \n", "1 | \n", "794 | \n", "2 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
2215 | \n", "0 | \n", "677 | \n", "0 | \n", "
2216 | \n", "1 | \n", "7909 | \n", "2 | \n", "
2217 | \n", "2 | \n", "1371 | \n", "1 | \n", "
2218 | \n", "1 | \n", "295 | \n", "2 | \n", "
2219 | \n", "2 | \n", "33 | \n", "0 | \n", "
2220 rows × 3 columns
\n", "