{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Time series forecasting with DeepAR - Telecom data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Introduction" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Time series forecasasting with DeepAR is a supervised learning algorithm for forecasting scalar time series with Telecom data. This notebook demonstrates how to prepare a dataset of time series for training DeepAR with telecom Call Detail Record(CDR) data, classify Call Disconnect Reason and how to use the trained model for inference. The notebook uses a hybrid approach of Spark ML Random Forest Classifier and DeepAR.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%config IPCompleter.greedy=True" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This demonstrates the use of sparkml RandomForestClassifier for classification and feeds as input to DeepAR for Time series Prediction" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Start_Time_HH_MM_SS_s_index', 'Called_Number_index', 'Call_Service_Duration_index', 'Accounting_ID_index', 'Calling_Number_index', 'Start_Time_MM_DD_YYYY_index']\n" ] } ], "source": [ "from pyspark.sql.types import *\n", "from pyspark.sql import SparkSession\n", "from sagemaker import get_execution_role\n", "import sagemaker_pyspark\n", "import pandas as pd\n", "import numpy as np\n", "\n", "role = get_execution_role()\n", "\n", "# Configure Spark to use the SageMaker Spark dependency jars\n", "jars = sagemaker_pyspark.classpath_jars()\n", "\n", "classpath = \":\".join(sagemaker_pyspark.classpath_jars())\n", "\n", "spark = SparkSession.builder.config(\"spark.driver.extraClassPath\", classpath)\\\n", " .master(\"local[*]\").getOrCreate()\n", "\n", "def getCdrDataframe():\n", " cdr_start_loc = \"<%CDRStartFile%>\"\n", " cdr_stop_loc = \"<%CDRStopFile%>\"\n", " cdr_start_sample_loc = \"<%CDRStartSampleFile%>\"\n", " cdr_stop_sample_loc = \"<%CDRStopSampleFile%>\"\n", " \n", " df = spark.read.format(\"s3select\").parquet(cdr_stop_sample_loc)\n", " df.createOrReplaceTempView(\"cdr\")\n", " return df\n", "\n", "getCdrDataframe()\n", "\n", "def build_schema():\n", " \"\"\"Build and return a schema to use for the sample data.\"\"\"\n", " schema = StructType(\n", " [\n", " StructField(\"Accounting_ID\", StringType(), True),\n", " StructField(\"Start_Time_MM_DD_YYYY\", StringType(), True),\n", " StructField(\"Start_Time_HH_MM_SS_s\", StringType(), True),\n", " StructField(\"Call_Service_Duration\", StringType(), True),\n", " StructField(\"Call_Disconnect_Reason\", StringType(), True),\n", " StructField(\"Calling_Number\", StringType(), True),\n", " StructField(\"Called_Number\", StringType(), True)\n", " ]\n", " )\n", " return schema\n", "\n", "import matplotlib.pyplot as plt\n", "dataDF = spark.sql(\"SELECT _c2,_c5,_c6,_c13,_c14,_c19,_c20 from cdr where _c0 = 'STOP'\")\n", "dataPanda = dataDF.toPandas()\n", "newDataDF = spark.createDataFrame(dataPanda.dropna(),build_schema())\n", "dataPd = newDataDF.toPandas()\n", "\n", "integerColumns = [\"Call_Service_Duration\" , \"Call_Disconnect_Reason\", \"Calling_Number\", \"Called_Number\"]\n", "for col in integerColumns:\n", " dataPd[col] = dataPd[col].astype(int)\n", " \n", "#Mock Data\n", "def mock_data():\n", " from pyspark.sql.functions import rand,when\n", " addDF = newDataDF\n", " unionDF = addDF.union(newDataDF)\n", " df = unionDF.drop('Call_Disconnect_Reason') \n", " df1 = df.withColumn('Call_Disconnect_Reason', when(rand(seed=1234) > 0.5, 16).otherwise(17)) \n", " return df1\n", "\n", "df1 = mock_data() \n", " \n", " \n", "from pyspark.sql.functions import rand\n", "\n", "trainingFraction = 0.75; testingFraction = (1-trainingFraction);\n", "seed = 1234;\n", "trainData, testData = df1.randomSplit([trainingFraction, testingFraction], seed=seed);\n", "\n", "# # CACHE TRAIN AND TEST DATA\n", "trainData.cache()\n", "testData.cache()\n", "trainData.count(),testData.count()\n", "\n", "from pyspark.ml.feature import StringIndexer\n", "columns_list = list(set(newDataDF.columns)-set(['Call_Disconnect_Reason']) ) \n", "indexers = []\n", "for column in columns_list:\n", " indexer = StringIndexer(inputCol=column, outputCol=column+\"_index\")\n", " indexer.setHandleInvalid(\"skip\")\n", " indexers.append(indexer)\n", "\n", "from pyspark.ml.feature import StringIndexer\n", "# Convert target into numerical categories\n", "labelIndexer = StringIndexer(inputCol=\"Call_Disconnect_Reason\", outputCol=\"label\")\n", "labelIndexer.setHandleInvalid(\"skip\")\n", " \n", "from pyspark.ml.feature import VectorAssembler\n", "from array import array\n", "\n", "inputcolsIndexer = []\n", "for col in columns_list:\n", " inputcolsIndexer.append(col+\"_index\")\n", "print(inputcolsIndexer)\n", "\n", "vecAssembler = VectorAssembler(inputCols=inputcolsIndexer, outputCol=\"features\")\n", "\n", "from pyspark.ml.classification import RandomForestClassifier\n", "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n", "\n", "# Train a RandomForest model.\n", "rf = RandomForestClassifier(labelCol=\"label\", featuresCol=\"features\", maxDepth=8, maxBins=2400000, numTrees=128,impurity=\"gini\")\n", "\n", "from pyspark.ml.feature import ChiSqSelector\n", "chisqSelector = ChiSqSelector(numTopFeatures=3, featuresCol=\"features\",\n", " outputCol=\"selectedFeatures\", labelCol=\"label\")\n", "\n", "from pyspark.ml import Pipeline\n", "stages = []\n", "stages += indexers \n", "stages += [labelIndexer]\n", "stages += [vecAssembler]\n", "stages += [rf]\n", "stages += [chisqSelector]\n", "\n", "pipeline = Pipeline(stages=stages)\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(33607, 11219)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainData.count(),testData.count()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3403, 4487)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_sdata = trainData.sample(False,0.1)\n", "test_sdata = testData.sample(False,0.4)\n", "train_sdata.count(),test_sdata.count()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", "Wall time: 8.34 µs\n" ] }, { "data": { "text/plain": [ "356" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%time\n", "model = pipeline.fit(train_sdata)\n", "predictions = model.transform(test_sdata)\n", "predictions.createOrReplaceTempView(\"predicted_table\")\n", "predictions.count()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+------------------+---------------------+---------------------+---------------------+--------------+-------------+----------------------+---------------------------+-------------------+---------------------------+-------------------+--------------------+---------------------------+-----+--------------------+--------------------+--------------------+----------+----------------+\n", "| Accounting_ID|Start_Time_MM_DD_YYYY|Start_Time_HH_MM_SS_s|Call_Service_Duration|Calling_Number|Called_Number|Call_Disconnect_Reason|Start_Time_HH_MM_SS_s_index|Called_Number_index|Call_Service_Duration_index|Accounting_ID_index|Calling_Number_index|Start_Time_MM_DD_YYYY_index|label| features| rawPrediction| probability|prediction|selectedFeatures|\n", "+------------------+---------------------+---------------------+---------------------+--------------+-------------+----------------------+---------------------------+-------------------+---------------------------+-------------------+--------------------+---------------------------+-----+--------------------+--------------------+--------------------+----------+----------------+\n", "|0x00016E0F11780902| 08/10/2018| 12:57:43.1| 5| 9645000099| 3512000099| 16| 1339.0| 8.0| 277.0| 2840.0| 8.0| 0.0| 1.0|[1339.0,8.0,277.0...|[0.47079490632979...|[0.00367808520570...| 1.0| [8.0,277.0,8.0]|\n", "|0x00016E0F1240F35C| 08/10/2018| 12:49:03.1| 135| 9645000072| 3512000072| 16| 38.0| 95.0| 9.0| 2958.0| 93.0| 0.0| 1.0|[38.0,95.0,9.0,29...|[81.7280438800052...|[0.63850034281254...| 0.0| [95.0,9.0,93.0]|\n", "+------------------+---------------------+---------------------+---------------------+--------------+-------------+----------------------+---------------------------+-------------------+---------------------------+-------------------+--------------------+---------------------------+-----+--------------------+--------------------+--------------------+----------+----------------+\n", "only showing top 2 rows\n", "\n" ] } ], "source": [ "predictions.show(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- _Call Disconnect Reason prediction count is computed to classify Normal Call Clearing(16) records and non Normal Call Clearing records as anomalous._" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "pred_sql = spark.sql(\"Select Start_Time_MM_DD_YYYY,Start_Time_HH_MM_SS_s,Call_Disconnect_Reason,prediction, CASE WHEN Call_Disconnect_Reason = 16 AND prediction = 0.0 THEN 0 ELSE 1 END AS anomaly from predicted_table\")\n", "dft = pred_sql.toPandas()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Start_Time_MM_DD_YYYYStart_Time_HH_MM_SS_sCall_Disconnect_Reasonpredictionanomaly
Date
08/10/2018 11:37:28.108/10/201811:37:28.1170.01
08/10/2018 11:37:30.108/10/201811:37:30.1161.01
08/10/2018 11:37:37.108/10/201811:37:37.1170.01
08/10/2018 11:38:00.108/10/201811:38:00.1170.01
08/10/2018 11:38:33.108/10/201811:38:33.1161.01
08/10/2018 11:39:33.108/10/201811:39:33.1161.01
08/10/2018 11:39:43.108/10/201811:39:43.1170.01
08/10/2018 11:39:48.108/10/201811:39:48.1160.00
08/10/2018 11:41:13.108/10/201811:41:13.1171.01
08/10/2018 11:42:17.108/10/201811:42:17.1161.01
08/10/2018 11:42:21.108/10/201811:42:21.1171.01
08/10/2018 11:42:32.108/10/201811:42:32.1160.00
08/10/2018 11:42:38.108/10/201811:42:38.1160.00
08/10/2018 11:43:10.108/10/201811:43:10.1160.00
08/10/2018 11:43:20.108/10/201811:43:20.1170.01
08/10/2018 11:43:38.108/10/201811:43:38.1160.00
08/10/2018 11:44:35.108/10/201811:44:35.1160.00
08/10/2018 11:45:05.108/10/201811:45:05.1171.01
08/10/2018 11:45:13.108/10/201811:45:13.1160.00
08/10/2018 11:45:33.108/10/201811:45:33.1160.00
08/10/2018 11:46:59.108/10/201811:46:59.1171.01
08/10/2018 11:48:55.108/10/201811:48:55.1161.01
08/10/2018 11:48:56.108/10/201811:48:56.1170.01
08/10/2018 11:49:00.108/10/201811:49:00.1170.01
08/10/2018 11:49:07.108/10/201811:49:07.1170.01
08/10/2018 11:49:08.108/10/201811:49:08.1160.00
08/10/2018 11:49:16.108/10/201811:49:16.1160.00
08/10/2018 11:50:13.108/10/201811:50:13.1170.01
08/10/2018 11:50:22.108/10/201811:50:22.1161.01
08/10/2018 11:50:35.108/10/201811:50:35.1170.01
..................
08/10/2018 13:54:28.108/10/201813:54:28.1160.00
08/10/2018 13:54:33.108/10/201813:54:33.1160.00
08/10/2018 13:55:19.108/10/201813:55:19.1161.01
08/10/2018 13:56:27.108/10/201813:56:27.1170.01
08/10/2018 13:56:36.108/10/201813:56:36.1170.01
08/10/2018 13:58:12.108/10/201813:58:12.1161.01
08/10/2018 13:58:43.108/10/201813:58:43.1171.01
08/10/2018 13:58:54.108/10/201813:58:54.1171.01
08/10/2018 13:59:02.108/10/201813:59:02.1161.01
08/10/2018 13:59:40.108/10/201813:59:40.1160.00
08/10/2018 13:59:46.108/10/201813:59:46.1160.00
08/10/2018 13:59:47.108/10/201813:59:47.1161.01
08/10/2018 14:00:12.108/10/201814:00:12.1161.01
08/10/2018 14:00:32.108/10/201814:00:32.1161.01
08/10/2018 14:00:38.108/10/201814:00:38.1171.01
08/10/2018 14:01:12.108/10/201814:01:12.1160.00
08/10/2018 14:01:40.108/10/201814:01:40.1160.00
08/10/2018 14:02:04.108/10/201814:02:04.1171.01
08/10/2018 14:02:07.108/10/201814:02:07.1171.01
08/10/2018 14:02:19.108/10/201814:02:19.1171.01
08/10/2018 14:03:02.108/10/201814:03:02.1161.01
08/10/2018 14:03:38.108/10/201814:03:38.1171.01
08/10/2018 14:04:18.108/10/201814:04:18.1171.01
08/10/2018 14:04:36.108/10/201814:04:36.1171.01
08/10/2018 14:04:46.108/10/201814:04:46.1161.01
08/10/2018 14:04:56.108/10/201814:04:56.1171.01
08/10/2018 14:05:34.108/10/201814:05:34.1171.01
08/10/2018 14:06:03.108/10/201814:06:03.1161.01
08/10/2018 14:06:43.108/10/201814:06:43.1161.01
08/10/2018 14:06:53.108/10/201814:06:53.1160.00
\n", "

356 rows × 5 columns

\n", "
" ], "text/plain": [ " Start_Time_MM_DD_YYYY Start_Time_HH_MM_SS_s \\\n", "Date \n", "08/10/2018 11:37:28.1 08/10/2018 11:37:28.1 \n", "08/10/2018 11:37:30.1 08/10/2018 11:37:30.1 \n", "08/10/2018 11:37:37.1 08/10/2018 11:37:37.1 \n", "08/10/2018 11:38:00.1 08/10/2018 11:38:00.1 \n", "08/10/2018 11:38:33.1 08/10/2018 11:38:33.1 \n", "08/10/2018 11:39:33.1 08/10/2018 11:39:33.1 \n", "08/10/2018 11:39:43.1 08/10/2018 11:39:43.1 \n", "08/10/2018 11:39:48.1 08/10/2018 11:39:48.1 \n", "08/10/2018 11:41:13.1 08/10/2018 11:41:13.1 \n", "08/10/2018 11:42:17.1 08/10/2018 11:42:17.1 \n", "08/10/2018 11:42:21.1 08/10/2018 11:42:21.1 \n", "08/10/2018 11:42:32.1 08/10/2018 11:42:32.1 \n", "08/10/2018 11:42:38.1 08/10/2018 11:42:38.1 \n", "08/10/2018 11:43:10.1 08/10/2018 11:43:10.1 \n", "08/10/2018 11:43:20.1 08/10/2018 11:43:20.1 \n", "08/10/2018 11:43:38.1 08/10/2018 11:43:38.1 \n", "08/10/2018 11:44:35.1 08/10/2018 11:44:35.1 \n", "08/10/2018 11:45:05.1 08/10/2018 11:45:05.1 \n", "08/10/2018 11:45:13.1 08/10/2018 11:45:13.1 \n", "08/10/2018 11:45:33.1 08/10/2018 11:45:33.1 \n", "08/10/2018 11:46:59.1 08/10/2018 11:46:59.1 \n", "08/10/2018 11:48:55.1 08/10/2018 11:48:55.1 \n", "08/10/2018 11:48:56.1 08/10/2018 11:48:56.1 \n", "08/10/2018 11:49:00.1 08/10/2018 11:49:00.1 \n", "08/10/2018 11:49:07.1 08/10/2018 11:49:07.1 \n", "08/10/2018 11:49:08.1 08/10/2018 11:49:08.1 \n", "08/10/2018 11:49:16.1 08/10/2018 11:49:16.1 \n", "08/10/2018 11:50:13.1 08/10/2018 11:50:13.1 \n", "08/10/2018 11:50:22.1 08/10/2018 11:50:22.1 \n", "08/10/2018 11:50:35.1 08/10/2018 11:50:35.1 \n", "... ... ... \n", "08/10/2018 13:54:28.1 08/10/2018 13:54:28.1 \n", "08/10/2018 13:54:33.1 08/10/2018 13:54:33.1 \n", "08/10/2018 13:55:19.1 08/10/2018 13:55:19.1 \n", "08/10/2018 13:56:27.1 08/10/2018 13:56:27.1 \n", "08/10/2018 13:56:36.1 08/10/2018 13:56:36.1 \n", "08/10/2018 13:58:12.1 08/10/2018 13:58:12.1 \n", "08/10/2018 13:58:43.1 08/10/2018 13:58:43.1 \n", "08/10/2018 13:58:54.1 08/10/2018 13:58:54.1 \n", "08/10/2018 13:59:02.1 08/10/2018 13:59:02.1 \n", "08/10/2018 13:59:40.1 08/10/2018 13:59:40.1 \n", "08/10/2018 13:59:46.1 08/10/2018 13:59:46.1 \n", "08/10/2018 13:59:47.1 08/10/2018 13:59:47.1 \n", "08/10/2018 14:00:12.1 08/10/2018 14:00:12.1 \n", "08/10/2018 14:00:32.1 08/10/2018 14:00:32.1 \n", "08/10/2018 14:00:38.1 08/10/2018 14:00:38.1 \n", "08/10/2018 14:01:12.1 08/10/2018 14:01:12.1 \n", "08/10/2018 14:01:40.1 08/10/2018 14:01:40.1 \n", "08/10/2018 14:02:04.1 08/10/2018 14:02:04.1 \n", "08/10/2018 14:02:07.1 08/10/2018 14:02:07.1 \n", "08/10/2018 14:02:19.1 08/10/2018 14:02:19.1 \n", "08/10/2018 14:03:02.1 08/10/2018 14:03:02.1 \n", "08/10/2018 14:03:38.1 08/10/2018 14:03:38.1 \n", "08/10/2018 14:04:18.1 08/10/2018 14:04:18.1 \n", "08/10/2018 14:04:36.1 08/10/2018 14:04:36.1 \n", "08/10/2018 14:04:46.1 08/10/2018 14:04:46.1 \n", "08/10/2018 14:04:56.1 08/10/2018 14:04:56.1 \n", "08/10/2018 14:05:34.1 08/10/2018 14:05:34.1 \n", "08/10/2018 14:06:03.1 08/10/2018 14:06:03.1 \n", "08/10/2018 14:06:43.1 08/10/2018 14:06:43.1 \n", "08/10/2018 14:06:53.1 08/10/2018 14:06:53.1 \n", "\n", " Call_Disconnect_Reason prediction anomaly \n", "Date \n", "08/10/2018 11:37:28.1 17 0.0 1 \n", "08/10/2018 11:37:30.1 16 1.0 1 \n", "08/10/2018 11:37:37.1 17 0.0 1 \n", "08/10/2018 11:38:00.1 17 0.0 1 \n", "08/10/2018 11:38:33.1 16 1.0 1 \n", "08/10/2018 11:39:33.1 16 1.0 1 \n", "08/10/2018 11:39:43.1 17 0.0 1 \n", "08/10/2018 11:39:48.1 16 0.0 0 \n", "08/10/2018 11:41:13.1 17 1.0 1 \n", "08/10/2018 11:42:17.1 16 1.0 1 \n", "08/10/2018 11:42:21.1 17 1.0 1 \n", "08/10/2018 11:42:32.1 16 0.0 0 \n", "08/10/2018 11:42:38.1 16 0.0 0 \n", "08/10/2018 11:43:10.1 16 0.0 0 \n", "08/10/2018 11:43:20.1 17 0.0 1 \n", "08/10/2018 11:43:38.1 16 0.0 0 \n", "08/10/2018 11:44:35.1 16 0.0 0 \n", "08/10/2018 11:45:05.1 17 1.0 1 \n", "08/10/2018 11:45:13.1 16 0.0 0 \n", "08/10/2018 11:45:33.1 16 0.0 0 \n", "08/10/2018 11:46:59.1 17 1.0 1 \n", "08/10/2018 11:48:55.1 16 1.0 1 \n", "08/10/2018 11:48:56.1 17 0.0 1 \n", "08/10/2018 11:49:00.1 17 0.0 1 \n", "08/10/2018 11:49:07.1 17 0.0 1 \n", "08/10/2018 11:49:08.1 16 0.0 0 \n", "08/10/2018 11:49:16.1 16 0.0 0 \n", "08/10/2018 11:50:13.1 17 0.0 1 \n", "08/10/2018 11:50:22.1 16 1.0 1 \n", "08/10/2018 11:50:35.1 17 0.0 1 \n", "... ... ... ... \n", "08/10/2018 13:54:28.1 16 0.0 0 \n", "08/10/2018 13:54:33.1 16 0.0 0 \n", "08/10/2018 13:55:19.1 16 1.0 1 \n", "08/10/2018 13:56:27.1 17 0.0 1 \n", "08/10/2018 13:56:36.1 17 0.0 1 \n", "08/10/2018 13:58:12.1 16 1.0 1 \n", "08/10/2018 13:58:43.1 17 1.0 1 \n", "08/10/2018 13:58:54.1 17 1.0 1 \n", "08/10/2018 13:59:02.1 16 1.0 1 \n", "08/10/2018 13:59:40.1 16 0.0 0 \n", "08/10/2018 13:59:46.1 16 0.0 0 \n", "08/10/2018 13:59:47.1 16 1.0 1 \n", "08/10/2018 14:00:12.1 16 1.0 1 \n", "08/10/2018 14:00:32.1 16 1.0 1 \n", "08/10/2018 14:00:38.1 17 1.0 1 \n", "08/10/2018 14:01:12.1 16 0.0 0 \n", "08/10/2018 14:01:40.1 16 0.0 0 \n", "08/10/2018 14:02:04.1 17 1.0 1 \n", "08/10/2018 14:02:07.1 17 1.0 1 \n", "08/10/2018 14:02:19.1 17 1.0 1 \n", "08/10/2018 14:03:02.1 16 1.0 1 \n", "08/10/2018 14:03:38.1 17 1.0 1 \n", "08/10/2018 14:04:18.1 17 1.0 1 \n", "08/10/2018 14:04:36.1 17 1.0 1 \n", "08/10/2018 14:04:46.1 16 1.0 1 \n", "08/10/2018 14:04:56.1 17 1.0 1 \n", "08/10/2018 14:05:34.1 17 1.0 1 \n", "08/10/2018 14:06:03.1 16 1.0 1 \n", "08/10/2018 14:06:43.1 16 1.0 1 \n", "08/10/2018 14:06:53.1 16 0.0 0 \n", "\n", "[356 rows x 5 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "#dataframetime\n", "dft['Date']=dft['Start_Time_MM_DD_YYYY'].astype(str)+' '+dft['Start_Time_HH_MM_SS_s']\n", "dft['Date'] = pd.to_datetime(dft['Date'], format='%m/%d/%Y %H:%M:%S.%f', errors = 'coerce')\n", "dft = dft.sort_values(by=['Date'])\n", "dft.set_index('Date', inplace=True)\n", "dft" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Start_Time_MM_DD_YYYYStart_Time_HH_MM_SS_sCall_Disconnect_Reasonpredictionanomaly
Date
2018-08-10 11:37:0008/10/201811:37:28.1170.01
2018-08-10 11:38:0008/10/201811:37:30.1161.01
2018-08-10 11:38:0008/10/201811:37:37.1170.01
2018-08-10 11:38:0008/10/201811:38:00.1170.01
2018-08-10 11:39:0008/10/201811:38:33.1161.01
2018-08-10 11:40:0008/10/201811:39:33.1161.01
2018-08-10 11:40:0008/10/201811:39:43.1170.01
2018-08-10 11:40:0008/10/201811:39:48.1160.00
2018-08-10 11:41:0008/10/201811:41:13.1171.01
2018-08-10 11:42:0008/10/201811:42:17.1161.01
\n", "
" ], "text/plain": [ " Start_Time_MM_DD_YYYY Start_Time_HH_MM_SS_s \\\n", "Date \n", "2018-08-10 11:37:00 08/10/2018 11:37:28.1 \n", "2018-08-10 11:38:00 08/10/2018 11:37:30.1 \n", "2018-08-10 11:38:00 08/10/2018 11:37:37.1 \n", "2018-08-10 11:38:00 08/10/2018 11:38:00.1 \n", "2018-08-10 11:39:00 08/10/2018 11:38:33.1 \n", "2018-08-10 11:40:00 08/10/2018 11:39:33.1 \n", "2018-08-10 11:40:00 08/10/2018 11:39:43.1 \n", "2018-08-10 11:40:00 08/10/2018 11:39:48.1 \n", "2018-08-10 11:41:00 08/10/2018 11:41:13.1 \n", "2018-08-10 11:42:00 08/10/2018 11:42:17.1 \n", "\n", " Call_Disconnect_Reason prediction anomaly \n", "Date \n", "2018-08-10 11:37:00 17 0.0 1 \n", "2018-08-10 11:38:00 16 1.0 1 \n", "2018-08-10 11:38:00 17 0.0 1 \n", "2018-08-10 11:38:00 17 0.0 1 \n", "2018-08-10 11:39:00 16 1.0 1 \n", "2018-08-10 11:40:00 16 1.0 1 \n", "2018-08-10 11:40:00 17 0.0 1 \n", "2018-08-10 11:40:00 16 0.0 0 \n", "2018-08-10 11:41:00 17 1.0 1 \n", "2018-08-10 11:42:00 16 1.0 1 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dft.index = pd.to_datetime(dft.index)\n", "dfn = dft\n", "#dataframeindex\n", "dfn.index = dfn.index.round('min')\n", "dfn.head(10)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ts = dfn\n", "ts.plot()\n", "plt.figure(figsize=(10,10))\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "time_series = []\n", "data = np.array(dfn['anomaly'])\n", "freq = '1min'\n", "idx = dfn.index\n", "# Note: Setting dataframe index frequency to 1 minute requires passed values to conform to one minute\n", "# frequency. For reference see also DataFrame.asfreq() and DataFrame.drop_duplicates()\n", "idx.freq = pd.tseries.frequencies.to_offset(freq)\n", "time_series.append(pd.Series(data=data, index=idx))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Times Series Plot" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "time_series[0].plot()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Start_Time_MM_DD_YYYYStart_Time_HH_MM_SS_sCall_Disconnect_Reasonpredictionanomaly
Date
2018-08-10 11:37:0008/10/201811:37:28.1170.01
2018-08-10 11:38:0008/10/201811:37:30.1161.01
2018-08-10 11:38:0008/10/201811:37:37.1170.01
2018-08-10 11:38:0008/10/201811:38:00.1170.01
2018-08-10 11:39:0008/10/201811:38:33.1161.01
2018-08-10 11:40:0008/10/201811:39:33.1161.01
2018-08-10 11:40:0008/10/201811:39:43.1170.01
2018-08-10 11:40:0008/10/201811:39:48.1160.00
2018-08-10 11:41:0008/10/201811:41:13.1171.01
2018-08-10 11:42:0008/10/201811:42:17.1161.01
2018-08-10 11:42:0008/10/201811:42:21.1171.01
2018-08-10 11:43:0008/10/201811:42:32.1160.00
2018-08-10 11:43:0008/10/201811:42:38.1160.00
2018-08-10 11:43:0008/10/201811:43:10.1160.00
2018-08-10 11:43:0008/10/201811:43:20.1170.01
2018-08-10 11:44:0008/10/201811:43:38.1160.00
2018-08-10 11:45:0008/10/201811:44:35.1160.00
2018-08-10 11:45:0008/10/201811:45:05.1171.01
2018-08-10 11:45:0008/10/201811:45:13.1160.00
2018-08-10 11:46:0008/10/201811:45:33.1160.00
2018-08-10 11:47:0008/10/201811:46:59.1171.01
2018-08-10 11:49:0008/10/201811:48:55.1161.01
2018-08-10 11:49:0008/10/201811:48:56.1170.01
2018-08-10 11:49:0008/10/201811:49:00.1170.01
2018-08-10 11:49:0008/10/201811:49:07.1170.01
2018-08-10 11:49:0008/10/201811:49:08.1160.00
2018-08-10 11:49:0008/10/201811:49:16.1160.00
2018-08-10 11:50:0008/10/201811:50:13.1170.01
2018-08-10 11:50:0008/10/201811:50:22.1161.01
2018-08-10 11:51:0008/10/201811:50:35.1170.01
..................
2018-08-10 13:44:0008/10/201813:44:08.1170.01
2018-08-10 13:45:0008/10/201813:44:41.1160.00
2018-08-10 13:45:0008/10/201813:44:59.1160.00
2018-08-10 13:45:0008/10/201813:45:16.1161.01
2018-08-10 13:45:0008/10/201813:45:25.1160.00
2018-08-10 13:46:0008/10/201813:45:31.1170.01
2018-08-10 13:46:0008/10/201813:46:09.1170.01
2018-08-10 13:46:0008/10/201813:46:13.1170.01
2018-08-10 13:48:0008/10/201813:47:59.1170.01
2018-08-10 13:49:0008/10/201813:48:34.1160.00
2018-08-10 13:50:0008/10/201813:49:32.1170.01
2018-08-10 13:50:0008/10/201813:49:40.1170.01
2018-08-10 13:50:0008/10/201813:50:07.1171.01
2018-08-10 13:50:0008/10/201813:50:17.1161.01
2018-08-10 13:51:0008/10/201813:50:30.1170.01
2018-08-10 13:51:0008/10/201813:50:49.1171.01
2018-08-10 13:52:0008/10/201813:51:44.1170.01
2018-08-10 13:52:0008/10/201813:51:50.1170.01
2018-08-10 13:53:0008/10/201813:52:54.1160.00
2018-08-10 13:54:0008/10/201813:53:52.1170.01
2018-08-10 13:54:0008/10/201813:54:28.1160.00
2018-08-10 13:55:0008/10/201813:54:33.1160.00
2018-08-10 13:55:0008/10/201813:55:19.1161.01
2018-08-10 13:56:0008/10/201813:56:27.1170.01
2018-08-10 13:57:0008/10/201813:56:36.1170.01
2018-08-10 13:58:0008/10/201813:58:12.1161.01
2018-08-10 13:59:0008/10/201813:58:43.1171.01
2018-08-10 13:59:0008/10/201813:58:54.1171.01
2018-08-10 13:59:0008/10/201813:59:02.1161.01
2018-08-10 14:00:0008/10/201813:59:40.1160.00
\n", "

336 rows × 5 columns

\n", "
" ], "text/plain": [ " Start_Time_MM_DD_YYYY Start_Time_HH_MM_SS_s \\\n", "Date \n", "2018-08-10 11:37:00 08/10/2018 11:37:28.1 \n", "2018-08-10 11:38:00 08/10/2018 11:37:30.1 \n", "2018-08-10 11:38:00 08/10/2018 11:37:37.1 \n", "2018-08-10 11:38:00 08/10/2018 11:38:00.1 \n", "2018-08-10 11:39:00 08/10/2018 11:38:33.1 \n", "2018-08-10 11:40:00 08/10/2018 11:39:33.1 \n", "2018-08-10 11:40:00 08/10/2018 11:39:43.1 \n", "2018-08-10 11:40:00 08/10/2018 11:39:48.1 \n", "2018-08-10 11:41:00 08/10/2018 11:41:13.1 \n", "2018-08-10 11:42:00 08/10/2018 11:42:17.1 \n", "2018-08-10 11:42:00 08/10/2018 11:42:21.1 \n", "2018-08-10 11:43:00 08/10/2018 11:42:32.1 \n", "2018-08-10 11:43:00 08/10/2018 11:42:38.1 \n", "2018-08-10 11:43:00 08/10/2018 11:43:10.1 \n", "2018-08-10 11:43:00 08/10/2018 11:43:20.1 \n", "2018-08-10 11:44:00 08/10/2018 11:43:38.1 \n", "2018-08-10 11:45:00 08/10/2018 11:44:35.1 \n", "2018-08-10 11:45:00 08/10/2018 11:45:05.1 \n", "2018-08-10 11:45:00 08/10/2018 11:45:13.1 \n", "2018-08-10 11:46:00 08/10/2018 11:45:33.1 \n", "2018-08-10 11:47:00 08/10/2018 11:46:59.1 \n", "2018-08-10 11:49:00 08/10/2018 11:48:55.1 \n", "2018-08-10 11:49:00 08/10/2018 11:48:56.1 \n", "2018-08-10 11:49:00 08/10/2018 11:49:00.1 \n", "2018-08-10 11:49:00 08/10/2018 11:49:07.1 \n", "2018-08-10 11:49:00 08/10/2018 11:49:08.1 \n", "2018-08-10 11:49:00 08/10/2018 11:49:16.1 \n", "2018-08-10 11:50:00 08/10/2018 11:50:13.1 \n", "2018-08-10 11:50:00 08/10/2018 11:50:22.1 \n", "2018-08-10 11:51:00 08/10/2018 11:50:35.1 \n", "... ... ... \n", "2018-08-10 13:44:00 08/10/2018 13:44:08.1 \n", "2018-08-10 13:45:00 08/10/2018 13:44:41.1 \n", "2018-08-10 13:45:00 08/10/2018 13:44:59.1 \n", "2018-08-10 13:45:00 08/10/2018 13:45:16.1 \n", "2018-08-10 13:45:00 08/10/2018 13:45:25.1 \n", "2018-08-10 13:46:00 08/10/2018 13:45:31.1 \n", "2018-08-10 13:46:00 08/10/2018 13:46:09.1 \n", "2018-08-10 13:46:00 08/10/2018 13:46:13.1 \n", "2018-08-10 13:48:00 08/10/2018 13:47:59.1 \n", "2018-08-10 13:49:00 08/10/2018 13:48:34.1 \n", "2018-08-10 13:50:00 08/10/2018 13:49:32.1 \n", "2018-08-10 13:50:00 08/10/2018 13:49:40.1 \n", "2018-08-10 13:50:00 08/10/2018 13:50:07.1 \n", "2018-08-10 13:50:00 08/10/2018 13:50:17.1 \n", "2018-08-10 13:51:00 08/10/2018 13:50:30.1 \n", "2018-08-10 13:51:00 08/10/2018 13:50:49.1 \n", "2018-08-10 13:52:00 08/10/2018 13:51:44.1 \n", "2018-08-10 13:52:00 08/10/2018 13:51:50.1 \n", "2018-08-10 13:53:00 08/10/2018 13:52:54.1 \n", "2018-08-10 13:54:00 08/10/2018 13:53:52.1 \n", "2018-08-10 13:54:00 08/10/2018 13:54:28.1 \n", "2018-08-10 13:55:00 08/10/2018 13:54:33.1 \n", "2018-08-10 13:55:00 08/10/2018 13:55:19.1 \n", "2018-08-10 13:56:00 08/10/2018 13:56:27.1 \n", "2018-08-10 13:57:00 08/10/2018 13:56:36.1 \n", "2018-08-10 13:58:00 08/10/2018 13:58:12.1 \n", "2018-08-10 13:59:00 08/10/2018 13:58:43.1 \n", "2018-08-10 13:59:00 08/10/2018 13:58:54.1 \n", "2018-08-10 13:59:00 08/10/2018 13:59:02.1 \n", "2018-08-10 14:00:00 08/10/2018 13:59:40.1 \n", "\n", " Call_Disconnect_Reason prediction anomaly \n", "Date \n", "2018-08-10 11:37:00 17 0.0 1 \n", "2018-08-10 11:38:00 16 1.0 1 \n", "2018-08-10 11:38:00 17 0.0 1 \n", "2018-08-10 11:38:00 17 0.0 1 \n", "2018-08-10 11:39:00 16 1.0 1 \n", "2018-08-10 11:40:00 16 1.0 1 \n", "2018-08-10 11:40:00 17 0.0 1 \n", "2018-08-10 11:40:00 16 0.0 0 \n", "2018-08-10 11:41:00 17 1.0 1 \n", "2018-08-10 11:42:00 16 1.0 1 \n", "2018-08-10 11:42:00 17 1.0 1 \n", "2018-08-10 11:43:00 16 0.0 0 \n", "2018-08-10 11:43:00 16 0.0 0 \n", "2018-08-10 11:43:00 16 0.0 0 \n", "2018-08-10 11:43:00 17 0.0 1 \n", "2018-08-10 11:44:00 16 0.0 0 \n", "2018-08-10 11:45:00 16 0.0 0 \n", "2018-08-10 11:45:00 17 1.0 1 \n", "2018-08-10 11:45:00 16 0.0 0 \n", "2018-08-10 11:46:00 16 0.0 0 \n", "2018-08-10 11:47:00 17 1.0 1 \n", "2018-08-10 11:49:00 16 1.0 1 \n", "2018-08-10 11:49:00 17 0.0 1 \n", "2018-08-10 11:49:00 17 0.0 1 \n", "2018-08-10 11:49:00 17 0.0 1 \n", "2018-08-10 11:49:00 16 0.0 0 \n", "2018-08-10 11:49:00 16 0.0 0 \n", "2018-08-10 11:50:00 17 0.0 1 \n", "2018-08-10 11:50:00 16 1.0 1 \n", "2018-08-10 11:51:00 17 0.0 1 \n", "... ... ... ... \n", "2018-08-10 13:44:00 17 0.0 1 \n", "2018-08-10 13:45:00 16 0.0 0 \n", "2018-08-10 13:45:00 16 0.0 0 \n", "2018-08-10 13:45:00 16 1.0 1 \n", "2018-08-10 13:45:00 16 0.0 0 \n", "2018-08-10 13:46:00 17 0.0 1 \n", "2018-08-10 13:46:00 17 0.0 1 \n", "2018-08-10 13:46:00 17 0.0 1 \n", "2018-08-10 13:48:00 17 0.0 1 \n", "2018-08-10 13:49:00 16 0.0 0 \n", "2018-08-10 13:50:00 17 0.0 1 \n", "2018-08-10 13:50:00 17 0.0 1 \n", "2018-08-10 13:50:00 17 1.0 1 \n", "2018-08-10 13:50:00 16 1.0 1 \n", "2018-08-10 13:51:00 17 0.0 1 \n", "2018-08-10 13:51:00 17 1.0 1 \n", "2018-08-10 13:52:00 17 0.0 1 \n", "2018-08-10 13:52:00 17 0.0 1 \n", "2018-08-10 13:53:00 16 0.0 0 \n", "2018-08-10 13:54:00 17 0.0 1 \n", "2018-08-10 13:54:00 16 0.0 0 \n", "2018-08-10 13:55:00 16 0.0 0 \n", "2018-08-10 13:55:00 16 1.0 1 \n", "2018-08-10 13:56:00 17 0.0 1 \n", "2018-08-10 13:57:00 17 0.0 1 \n", "2018-08-10 13:58:00 16 1.0 1 \n", "2018-08-10 13:59:00 17 1.0 1 \n", "2018-08-10 13:59:00 17 1.0 1 \n", "2018-08-10 13:59:00 16 1.0 1 \n", "2018-08-10 14:00:00 16 0.0 0 \n", "\n", "[336 rows x 5 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prediction_length = 20\n", "context_length = 20\n", "ts[:-prediction_length]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "time_series_training = []\n", "for ts in time_series:\n", " time_series_training.append(ts[:-prediction_length])\n", "time_series[0].plot(label='test')\n", "time_series_training[0].plot(label='train', ls=':')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "DeepAR supervised learning algorithm for forecasting scalar time series of Telecom Call Details Record." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import time\n", "import numpy as np\n", "np.random.seed(1)\n", "import pandas as pd\n", "import json\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will use the sagemaker client library for easy interface with sagemaker and s3fs for uploading the training data to S3. (Use `pip` to install missing libraries)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Solving environment: done\n", "\n", "\n", "==> WARNING: A newer version of conda exists. <==\n", " current version: 4.4.10\n", " latest version: 4.5.11\n", "\n", "Please update conda by running\n", "\n", " $ conda update -n base conda\n", "\n", "\n", "\n", "# All requested packages already installed.\n", "\n" ] } ], "source": [ "!conda install -y s3fs" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import s3fs\n", "import sagemaker\n", "from sagemaker import get_execution_role" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- The S3 bucket and prefix that you want to use for training and model data. This should be within the same region as the Notebook Instance, training, and hosting.\n", "- The IAM role arn used to give training and hosting access to your data. We use the `get_execution_role` function to obtain the role arn which was specified when creating the notebook." ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "isConfigCell": true }, "outputs": [], "source": [ "bucket = '<%bucket_name%>' #<-- your bucket_name\n", "version = '%%VERSION%%'\n", "prefix = 'sagemaker/Telecom-RandomForest/DeepAR'\n", "prefix = 'machine-learning-for-all/{}/data/cdr-stop'.format(version) \n", "\n", "sagemaker_session = sagemaker.Session()\n", "role = get_execution_role()\n", "\n", "s3_data_path = \"{}/{}\".format(bucket, prefix)\n", "s3_output_path = \"{}/{}/output\".format(bucket, prefix)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, we configure the container image to be used for the region that we are running in." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "from sagemaker.amazon.amazon_estimator import get_image_uri\n", "image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generating and uploading data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We want to train a model that can predict the next 20 points of syntheticly generated time series.\n", "The time series that we use have minutes granularity." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We also Configure the `context_length`, which determines how much context of the time series the model should take into account when making the prediction, i.e. how many previous points to look at." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "freq = 'min'\n", "# we predict for 20 Minutes\n", "prediction_length = 20\n", "\n", "# we also use 20 Minutes as context length, this is the number of state updates accomplished before making predictions\n", "context_length = 20" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "start_dataset = pd.Timestamp(\"2018-07-15 00:00:00\", freq=freq)\n", "end_training = pd.Timestamp(\"2018-08-09 00:00:00\", freq=freq)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The following utility functions convert pandas.Series objects into the appropriate JSON strings that DeepAR can consume. We will use these to write the data to S3." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "def series_to_obj(ts, cat=None):\n", " obj = {\"start\": str(ts.index[0]), \"target\": list(ts)}\n", " if cat is not None:\n", " obj[\"cat\"] = cat\n", " return obj\n", "\n", "def series_to_jsonline(ts, cat=None):\n", " return json.dumps(series_to_obj(ts, cat))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "encoding = \"utf-8\"\n", "s3filesystem = s3fs.S3FileSystem()\n", "\n", "with s3filesystem.open(s3_data_path + \"/train/train.json\", 'wb') as fp:\n", " for ts in time_series_training:\n", " fp.write(series_to_jsonline(ts).encode(encoding))\n", " fp.write('\\n'.encode(encoding))\n", "\n", "with s3filesystem.open(s3_data_path + \"/test/test.json\", 'wb') as fp:\n", " for ts in time_series:\n", " fp.write(series_to_jsonline(ts).encode(encoding))\n", " fp.write('\\n'.encode(encoding))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train a model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can now define the estimator that will launch the training job." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "estimator = sagemaker.estimator.Estimator(\n", " sagemaker_session=sagemaker_session,\n", " image_name=image_name,\n", " role=role,\n", " train_instance_count=1,\n", " train_instance_type='ml.c4.xlarge',\n", " base_job_name='Ml-Telcom-DemoForecast-deepar',\n", " output_path=\"s3://\" + s3_output_path\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hyperparameters" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "hyperparameters = {\n", " \"time_freq\": freq,\n", " \"context_length\": str(context_length),\n", " \"prediction_length\": str(prediction_length),\n", " \"num_cells\": \"40\",\n", " \"num_layers\": \"3\",\n", " \"likelihood\": \"gaussian\",\n", " \"epochs\": \"20\",\n", " \"mini_batch_size\": \"32\",\n", " \"learning_rate\": \"0.001\",\n", " \"dropout_rate\": \"0.05\",\n", " \"early_stopping_patience\": \"10\"\n", "}" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "estimator.set_hyperparameters(**hyperparameters)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "SageMaker will start an EC2 instance, download the data from S3, start training the model and save the trained model.\n", "\n", "DeepAR will also calculate accuracy metrics for the trained model on the test data set. This is done by predicting the last `perdiction_length` points of each time series in the test set and comparing this to the actual value of the time series.\n" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:sagemaker:Creating training-job with name: Ml-Telcom-DemoForecast-deepar-2018-09-27-19-04-18-999\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ ".....................\n", "\u001b[31mArguments: train\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/default-input.json: {u'num_dynamic_feat': u'auto', u'dropout_rate': u'0.10', u'mini_batch_size': u'128', u'test_quantiles': u'[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]', u'_tuning_objective_metric': u'', u'_num_gpus': u'auto', u'num_eval_samples': u'100', u'learning_rate': u'0.001', u'num_cells': u'40', u'num_layers': u'2', u'embedding_dimension': u'10', u'_kvstore': u'auto', u'_num_kv_servers': u'auto', u'cardinality': u'auto', u'likelihood': u'student-t', u'early_stopping_patience': u''}\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Reading provided configuration from /opt/ml/input/config/hyperparameters.json: {u'dropout_rate': u'0.05', u'learning_rate': u'0.001', u'num_cells': u'40', u'prediction_length': u'20', u'epochs': u'20', u'time_freq': u'min', u'context_length': u'20', u'num_layers': u'3', u'mini_batch_size': u'32', u'likelihood': u'gaussian', u'early_stopping_patience': u'10'}\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Final configuration: {u'dropout_rate': u'0.05', u'test_quantiles': u'[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]', u'_tuning_objective_metric': u'', u'num_eval_samples': u'100', u'learning_rate': u'0.001', u'num_layers': u'3', u'epochs': u'20', u'embedding_dimension': u'10', u'num_cells': u'40', u'_num_kv_servers': u'auto', u'mini_batch_size': u'32', u'likelihood': u'gaussian', u'num_dynamic_feat': u'auto', u'cardinality': u'auto', u'_num_gpus': u'auto', u'prediction_length': u'20', u'time_freq': u'min', u'context_length': u'20', u'_kvstore': u'auto', u'early_stopping_patience': u'10'}\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Detected entry point for worker worker\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Using early stopping with patience 10\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] [cardinality=auto] `cat` field was NOT found in the file `/opt/ml/input/data/train/train.json` and will NOT be used for training.\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] [num_dynamic_feat=auto] `dynamic_feat` field was NOT found in the file `/opt/ml/input/data/train/train.json` and will NOT be used for training.\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Training set statistics:\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Integer time series\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] number of time series: 1\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] number of observations: 336\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] mean target length: 336\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] min/mean/max target: 0.0/0.752976190476/1.0\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] mean abs(target): 0.752976190476\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] contains missing values: no\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Small number of time series. Doing 10 number of passes over dataset per epoch.\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Test set statistics:\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Integer time series\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] number of time series: 1\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] number of observations: 356\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] mean target length: 356\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] min/mean/max target: 0.0/0.755617977528/1.0\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] mean abs(target): 0.755617977528\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] contains missing values: no\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] nvidia-smi took: 0.0252199172974 secs to identify 0 gpus\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Number of GPUs being used: 0\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Create Store: local\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"get_graph.time\": {\"count\": 1, \"max\": 99.35808181762695, \"sum\": 99.35808181762695, \"min\": 99.35808181762695}}, \"EndTime\": 1538075259.603323, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075259.501878}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Number of GPUs being used: 0\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"initialize.time\": {\"count\": 1, \"max\": 275.1431465148926, \"sum\": 275.1431465148926, \"min\": 275.1431465148926}}, \"EndTime\": 1538075259.777122, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075259.60341}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:39 INFO 139992056387392] Epoch[0] Batch[0] avg_epoch_loss=1.668614\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] Epoch[0] Batch[5] avg_epoch_loss=1.000207\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] Epoch[0] Batch [5]#011Speed: 598.22 samples/sec#011loss=1.000207\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] processed a total of 308 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"epochs\": {\"count\": 1, \"max\": 20, \"sum\": 20.0, \"min\": 20}, \"update.time\": {\"count\": 1, \"max\": 685.8439445495605, \"sum\": 685.8439445495605, \"min\": 685.8439445495605}}, \"EndTime\": 1538075260.463144, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075259.777206}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=449.001835844 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] #progress_metric: host=algo-1, completed 5 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_fc9c47d7-a4b2-440c-9cc0-f21127499520-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 18.35489273071289, \"sum\": 18.35489273071289, \"min\": 18.35489273071289}}, \"EndTime\": 1538075260.482053, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075260.46323}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] Epoch[1] Batch[0] avg_epoch_loss=0.718298\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] Epoch[1] Batch[5] avg_epoch_loss=0.681346\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] Epoch[1] Batch [5]#011Speed: 685.95 samples/sec#011loss=0.681346\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:40 INFO 139992056387392] processed a total of 315 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 517.9588794708252, \"sum\": 517.9588794708252, \"min\": 517.9588794708252}}, \"EndTime\": 1538075261.000167, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075260.482125}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=607.989588937 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] #progress_metric: host=algo-1, completed 10 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_caa09677-5bcb-445c-9314-d27563bb62c3-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 17.38905906677246, \"sum\": 17.38905906677246, \"min\": 17.38905906677246}}, \"EndTime\": 1538075261.018225, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075261.000244}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[2] Batch[0] avg_epoch_loss=0.632169\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[2] Batch[5] avg_epoch_loss=0.638416\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[2] Batch [5]#011Speed: 706.46 samples/sec#011loss=0.638416\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[2] Batch[10] avg_epoch_loss=0.642646\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[2] Batch [10]#011Speed: 698.65 samples/sec#011loss=0.647721\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] processed a total of 331 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 540.679931640625, \"sum\": 540.679931640625, \"min\": 540.679931640625}}, \"EndTime\": 1538075261.559035, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075261.018301}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=612.065280397 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] #progress_metric: host=algo-1, completed 15 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_850c6060-f374-4591-8f03-0d8fd132f66c-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 19.886016845703125, \"sum\": 19.886016845703125, \"min\": 19.886016845703125}}, \"EndTime\": 1538075261.579381, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075261.559114}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[3] Batch[0] avg_epoch_loss=0.634267\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[3] Batch[5] avg_epoch_loss=0.613932\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:41 INFO 139992056387392] Epoch[3] Batch [5]#011Speed: 675.70 samples/sec#011loss=0.613932\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] processed a total of 315 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 517.29416847229, \"sum\": 517.29416847229, \"min\": 517.29416847229}}, \"EndTime\": 1538075262.096798, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075261.579449}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=608.806252817 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] #progress_metric: host=algo-1, completed 20 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_dcc74a08-84f0-4e77-a55a-c332767fb320-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 19.624948501586914, \"sum\": 19.624948501586914, \"min\": 19.624948501586914}}, \"EndTime\": 1538075262.11683, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075262.096875}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] Epoch[4] Batch[0] avg_epoch_loss=0.613078\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] Epoch[4] Batch[5] avg_epoch_loss=0.596061\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] Epoch[4] Batch [5]#011Speed: 657.28 samples/sec#011loss=0.596061\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] processed a total of 304 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 522.3469734191895, \"sum\": 522.3469734191895, \"min\": 522.3469734191895}}, \"EndTime\": 1538075262.639297, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075262.116898}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=581.868320453 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] #progress_metric: host=algo-1, completed 25 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_f2bfa6cf-96da-490f-adb7-436f73fb51b3-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 25.178194046020508, \"sum\": 25.178194046020508, \"min\": 25.178194046020508}}, \"EndTime\": 1538075262.664913, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075262.63937}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:42 INFO 139992056387392] Epoch[5] Batch[0] avg_epoch_loss=0.582463\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[5] Batch[5] avg_epoch_loss=0.577364\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[5] Batch [5]#011Speed: 646.17 samples/sec#011loss=0.577364\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[5] Batch[10] avg_epoch_loss=0.576717\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[5] Batch [10]#011Speed: 674.95 samples/sec#011loss=0.575940\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] processed a total of 343 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 589.5130634307861, \"sum\": 589.5130634307861, \"min\": 589.5130634307861}}, \"EndTime\": 1538075263.254557, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075262.664987}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=581.729553385 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] #progress_metric: host=algo-1, completed 30 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_6febd383-9f70-4d45-b70b-b5ac826b0804-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 16.642093658447266, \"sum\": 16.642093658447266, \"min\": 16.642093658447266}}, \"EndTime\": 1538075263.271619, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075263.254627}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[6] Batch[0] avg_epoch_loss=0.543216\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[6] Batch[5] avg_epoch_loss=0.558766\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[6] Batch [5]#011Speed: 686.83 samples/sec#011loss=0.558766\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] processed a total of 299 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 514.3611431121826, \"sum\": 514.3611431121826, \"min\": 514.3611431121826}}, \"EndTime\": 1538075263.786106, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075263.271693}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=581.184020435 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] #progress_metric: host=algo-1, completed 35 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_42559fb0-06ed-4370-916e-f6ee8f8bd256-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 22.788047790527344, \"sum\": 22.788047790527344, \"min\": 22.788047790527344}}, \"EndTime\": 1538075263.809307, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075263.786179}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:43 INFO 139992056387392] Epoch[7] Batch[0] avg_epoch_loss=0.560542\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Epoch[7] Batch[5] avg_epoch_loss=0.568886\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Epoch[7] Batch [5]#011Speed: 695.29 samples/sec#011loss=0.568886\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Epoch[7] Batch[10] avg_epoch_loss=0.564763\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Epoch[7] Batch [10]#011Speed: 706.21 samples/sec#011loss=0.559817\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] processed a total of 331 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 553.7979602813721, \"sum\": 553.7979602813721, \"min\": 553.7979602813721}}, \"EndTime\": 1538075264.363235, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075263.809382}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=597.574099398 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] #progress_metric: host=algo-1, completed 40 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] loss did not improve for 1 epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Epoch[8] Batch[0] avg_epoch_loss=0.553834\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Epoch[8] Batch[5] avg_epoch_loss=0.548489\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Epoch[8] Batch [5]#011Speed: 650.23 samples/sec#011loss=0.548489\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] processed a total of 306 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 549.6490001678467, \"sum\": 549.6490001678467, \"min\": 549.6490001678467}}, \"EndTime\": 1538075264.913256, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075264.363309}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=556.60351806 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] #progress_metric: host=algo-1, completed 45 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:44 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_6c5ba5e5-4907-4f06-9714-9789161d36e6-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 16.722917556762695, \"sum\": 16.722917556762695, \"min\": 16.722917556762695}}, \"EndTime\": 1538075264.930667, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075264.91333}\n", "\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[9] Batch[0] avg_epoch_loss=0.570835\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[9] Batch[5] avg_epoch_loss=0.539807\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[9] Batch [5]#011Speed: 691.67 samples/sec#011loss=0.539807\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[9] Batch[10] avg_epoch_loss=0.531483\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[9] Batch [10]#011Speed: 699.48 samples/sec#011loss=0.521493\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] processed a total of 343 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 557.8279495239258, \"sum\": 557.8279495239258, \"min\": 557.8279495239258}}, \"EndTime\": 1538075265.488624, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075264.930735}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=614.754606879 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] #progress_metric: host=algo-1, completed 50 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_32da4cfd-4674-4f8c-b33f-a4d3c962f225-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 17.2271728515625, \"sum\": 17.2271728515625, \"min\": 17.2271728515625}}, \"EndTime\": 1538075265.506317, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075265.4887}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[10] Batch[0] avg_epoch_loss=0.535287\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[10] Batch[5] avg_epoch_loss=0.526376\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:45 INFO 139992056387392] Epoch[10] Batch [5]#011Speed: 689.97 samples/sec#011loss=0.526376\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] processed a total of 299 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 496.1738586425781, \"sum\": 496.1738586425781, \"min\": 496.1738586425781}}, \"EndTime\": 1538075266.002614, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075265.506386}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=602.477603881 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] #progress_metric: host=algo-1, completed 55 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_28758133-0f00-4f01-862a-62dcebbfb8d7-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 23.020029067993164, \"sum\": 23.020029067993164, \"min\": 23.020029067993164}}, \"EndTime\": 1538075266.026067, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075266.00269}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[11] Batch[0] avg_epoch_loss=0.513396\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[11] Batch[5] avg_epoch_loss=0.521285\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[11] Batch [5]#011Speed: 682.70 samples/sec#011loss=0.521285\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[11] Batch[10] avg_epoch_loss=0.509232\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[11] Batch [10]#011Speed: 630.55 samples/sec#011loss=0.494767\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] processed a total of 336 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 586.3878726959229, \"sum\": 586.3878726959229, \"min\": 586.3878726959229}}, \"EndTime\": 1538075266.61258, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075266.026138}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=572.849808059 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] #progress_metric: host=algo-1, completed 60 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_3533afb6-9c9f-4d46-832c-66ca8fffa49f-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 24.536848068237305, \"sum\": 24.536848068237305, \"min\": 24.536848068237305}}, \"EndTime\": 1538075266.637568, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075266.612697}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[12] Batch[0] avg_epoch_loss=0.516536\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[12] Batch[5] avg_epoch_loss=0.506413\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:46 INFO 139992056387392] Epoch[12] Batch [5]#011Speed: 714.65 samples/sec#011loss=0.506413\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Epoch[12] Batch[10] avg_epoch_loss=0.490687\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Epoch[12] Batch [10]#011Speed: 622.12 samples/sec#011loss=0.471815\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] processed a total of 325 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 580.3749561309814, \"sum\": 580.3749561309814, \"min\": 580.3749561309814}}, \"EndTime\": 1538075267.21806, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075266.637633}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=559.880462346 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] #progress_metric: host=algo-1, completed 65 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_1239933f-c4ba-44e1-87c2-6ad2a865a72c-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 17.4560546875, \"sum\": 17.4560546875, \"min\": 17.4560546875}}, \"EndTime\": 1538075267.235959, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075267.218132}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Epoch[13] Batch[0] avg_epoch_loss=0.501668\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Epoch[13] Batch[5] avg_epoch_loss=0.488324\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Epoch[13] Batch [5]#011Speed: 718.44 samples/sec#011loss=0.488324\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] processed a total of 318 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 498.7211227416992, \"sum\": 498.7211227416992, \"min\": 498.7211227416992}}, \"EndTime\": 1538075267.7348, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075267.236025}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=637.496808187 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] #progress_metric: host=algo-1, completed 70 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_c4cd172a-8f3f-4e95-a0d8-b0c2c32675ca-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 25.576114654541016, \"sum\": 25.576114654541016, \"min\": 25.576114654541016}}, \"EndTime\": 1538075267.760798, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075267.734871}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:47 INFO 139992056387392] Epoch[14] Batch[0] avg_epoch_loss=0.490021\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[14] Batch[5] avg_epoch_loss=0.470555\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[14] Batch [5]#011Speed: 715.76 samples/sec#011loss=0.470555\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[14] Batch[10] avg_epoch_loss=0.470830\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[14] Batch [10]#011Speed: 707.86 samples/sec#011loss=0.471160\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] processed a total of 331 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 553.5869598388672, \"sum\": 553.5869598388672, \"min\": 553.5869598388672}}, \"EndTime\": 1538075268.31451, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075267.760867}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=597.733872377 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] #progress_metric: host=algo-1, completed 75 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_8bd0b083-dd55-49cc-b79e-7cbb099a688e-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 17.199039459228516, \"sum\": 17.199039459228516, \"min\": 17.199039459228516}}, \"EndTime\": 1538075268.332312, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075268.314643}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[15] Batch[0] avg_epoch_loss=0.423810\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[15] Batch[5] avg_epoch_loss=0.460837\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[15] Batch [5]#011Speed: 682.42 samples/sec#011loss=0.460837\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[15] Batch[10] avg_epoch_loss=0.456859\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Epoch[15] Batch [10]#011Speed: 616.69 samples/sec#011loss=0.452087\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] processed a total of 345 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 578.6659717559814, \"sum\": 578.6659717559814, \"min\": 578.6659717559814}}, \"EndTime\": 1538075268.911105, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075268.332382}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=596.074357969 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] #progress_metric: host=algo-1, completed 80 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:48 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_0e946b2d-7fc5-4f85-82d2-8de273be0043-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 17.600059509277344, \"sum\": 17.600059509277344, \"min\": 17.600059509277344}}, \"EndTime\": 1538075268.929193, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075268.911187}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Epoch[16] Batch[0] avg_epoch_loss=0.452340\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Epoch[16] Batch[5] avg_epoch_loss=0.428428\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Epoch[16] Batch [5]#011Speed: 709.95 samples/sec#011loss=0.428428\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] processed a total of 318 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 507.0209503173828, \"sum\": 507.0209503173828, \"min\": 507.0209503173828}}, \"EndTime\": 1538075269.436315, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075268.92925}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=627.075363928 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] #progress_metric: host=algo-1, completed 85 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_504570da-358b-4cff-930b-6dc2b9891b20-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 16.973018646240234, \"sum\": 16.973018646240234, \"min\": 16.973018646240234}}, \"EndTime\": 1538075269.453712, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075269.436378}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Epoch[17] Batch[0] avg_epoch_loss=0.390457\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Epoch[17] Batch[5] avg_epoch_loss=0.417007\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Epoch[17] Batch [5]#011Speed: 695.72 samples/sec#011loss=0.417007\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] processed a total of 304 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 512.854814529419, \"sum\": 512.854814529419, \"min\": 512.854814529419}}, \"EndTime\": 1538075269.966671, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075269.453771}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=592.638851783 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] #progress_metric: host=algo-1, completed 90 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:49 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_33f07b23-7078-4d1d-8c49-6702c376fb90-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 16.416072845458984, \"sum\": 16.416072845458984, \"min\": 16.416072845458984}}, \"EndTime\": 1538075269.983498, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075269.966743}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] Epoch[18] Batch[0] avg_epoch_loss=0.421876\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] Epoch[18] Batch[5] avg_epoch_loss=0.417273\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] Epoch[18] Batch [5]#011Speed: 717.33 samples/sec#011loss=0.417273\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] processed a total of 315 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 510.85495948791504, \"sum\": 510.85495948791504, \"min\": 510.85495948791504}}, \"EndTime\": 1538075270.494473, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075269.983565}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=616.470957122 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] #progress_metric: host=algo-1, completed 95 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_7d565c4c-05a4-4086-8285-9c74be72a0ac-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 17.915010452270508, \"sum\": 17.915010452270508, \"min\": 17.915010452270508}}, \"EndTime\": 1538075270.51285, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075270.494554}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] Epoch[19] Batch[0] avg_epoch_loss=0.332859\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] Epoch[19] Batch[5] avg_epoch_loss=0.344839\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:50 INFO 139992056387392] Epoch[19] Batch [5]#011Speed: 603.75 samples/sec#011loss=0.344839\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] processed a total of 310 examples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"update.time\": {\"count\": 1, \"max\": 550.1449108123779, \"sum\": 550.1449108123779, \"min\": 550.1449108123779}}, \"EndTime\": 1538075271.063111, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075270.512915}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] #throughput_metric: host=algo-1, train throughput=563.36997467 records/second\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] #progress_metric: host=algo-1, completed 100 % of epochs\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] best epoch loss so far\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/state_34c78c01-d822-458c-a25b-243b88513dcf-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.serialize.time\": {\"count\": 1, \"max\": 24.517059326171875, \"sum\": 24.517059326171875, \"min\": 24.517059326171875}}, \"EndTime\": 1538075271.088053, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075271.063192}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Loading parameters from best epoch (19)\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"state.deserialize.time\": {\"count\": 1, \"max\": 6.698131561279297, \"sum\": 6.698131561279297, \"min\": 6.698131561279297}}, \"EndTime\": 1538075271.094933, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075271.088121}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Final loss: 0.356049019098 (occurred at epoch 19)\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] #quality_metric: host=algo-1, train final_loss =0.356049019098\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Worker algo-1 finished training.\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 WARNING 139992056387392] wait_for_all_workers will not sync workers since the kv store is not running distributed\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] All workers finished. Serializing model for prediction.\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"get_graph.time\": {\"count\": 1, \"max\": 127.70795822143555, \"sum\": 127.70795822143555, \"min\": 127.70795822143555}}, \"EndTime\": 1538075271.223266, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075271.094984}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Number of GPUs being used: 0\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"finalize.time\": {\"count\": 1, \"max\": 186.57302856445312, \"sum\": 186.57302856445312, \"min\": 186.57302856445312}}, \"EndTime\": 1538075271.28209, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075271.223339}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Serializing to /opt/ml/model/model_algo-1\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Saved checkpoint to \"/opt/ml/model/model_algo-1-0000.params\"\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"model.serialize.time\": {\"count\": 1, \"max\": 10.080099105834961, \"sum\": 10.080099105834961, \"min\": 10.080099105834961}}, \"EndTime\": 1538075271.29228, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075271.282159}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Successfully serialized the model for prediction.\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:51 INFO 139992056387392] Evaluating model accuracy on testset using 100 samples\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"model.bind.time\": {\"count\": 1, \"max\": 0.04410743713378906, \"sum\": 0.04410743713378906, \"min\": 0.04410743713378906}}, \"EndTime\": 1538075271.293088, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075271.292329}\n", "\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"model.score.time\": {\"count\": 1, \"max\": 836.759090423584, \"sum\": 836.759090423584, \"min\": 836.759090423584}}, \"EndTime\": 1538075272.129804, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075271.293147}\n", "\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, RMSE): 0.404396450293\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, mean_wQuantileLoss): 0.287107\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.1]): 0.299846\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.2]): 0.35851\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.3]): 0.381453\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.4]): 0.367567\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.5]): 0.334978\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.6]): 0.302277\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.7]): 0.243726\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.8]): 0.183429\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #test_score (algo-1, wQuantileLoss[0.9]): 0.112176\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #quality_metric: host=algo-1, test RMSE =0.404396450293\u001b[0m\n", "\u001b[31m[09/27/2018 19:07:52 INFO 139992056387392] #quality_metric: host=algo-1, test mean_wQuantileLoss =0.287106812\u001b[0m\n", "\u001b[31m#metrics {\"Metrics\": {\"totaltime\": {\"count\": 1, \"max\": 12833.847999572754, \"sum\": 12833.847999572754, \"min\": 12833.847999572754}, \"setuptime\": {\"count\": 1, \"max\": 10.254859924316406, \"sum\": 10.254859924316406, \"min\": 10.254859924316406}}, \"EndTime\": 1538075272.145939, \"Dimensions\": {\"Host\": \"algo-1\", \"Operation\": \"training\", \"Algorithm\": \"AWS/DeepAR\"}, \"StartTime\": 1538075272.129872}\n", "\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Billable seconds: 97\n" ] } ], "source": [ "data_channels = {\n", " \"train\": \"s3://{}/train/\".format(s3_data_path),\n", " \"test\": \"s3://{}/test/\".format(s3_data_path)\n", "}\n", "\n", "estimator.fit(inputs=data_channels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create endpoint and predictor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The model is trained , we can use it to perform predictions by deploying it to an endpoint." ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:sagemaker:Creating model with name: Ml-Telcom-DemoForecast-deepar-2018-09-27-19-04-18-999\n", "INFO:sagemaker:Creating endpoint-config with name Ml-Telcom-DemoForecast-deepar-2018-09-27-19-04-18-999\n", "INFO:sagemaker:Creating endpoint with name Ml-Telcom-DemoForecast-deepar-2018-09-27-19-04-18-999\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------------------------------------------------!" ] } ], "source": [ "job_name = estimator.latest_training_job.name\n", "\n", "endpoint_name = sagemaker_session.endpoint_from_job(\n", " job_name=job_name,\n", " initial_instance_count=1,\n", " instance_type='ml.m4.xlarge',\n", " deployment_image=image_name,\n", " role=role\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To query the endpoint and perform predictions, we can define the following utility class: this allows making requests using `pandas.Series` objects rather than raw JSON strings." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "class DeepARPredictor(sagemaker.predictor.RealTimePredictor):\n", "\n", " def set_prediction_parameters(self, freq, prediction_length):\n", " \"\"\"Set the time frequency and prediction length parameters. This method **must** be called\n", " before being able to use `predict`.\n", " \n", " Parameters:\n", " freq -- string indicating the time frequency\n", " prediction_length -- integer, number of predicted time points\n", " \n", " Return value: none.\n", " \"\"\"\n", " self.freq = freq\n", " self.prediction_length = prediction_length\n", " \n", " def predict(self, ts, cat=None, encoding=\"utf-8\", num_samples=100, quantiles=[\"0.1\", \"0.5\", \"0.9\"]):\n", " \"\"\"Requests the prediction of for the time series listed in `ts`, each with the (optional)\n", " corresponding category listed in `cat`.\n", " \n", " Parameters:\n", " ts -- list of `pandas.Series` objects, the time series to predict\n", " cat -- list of integers (default: None)\n", " encoding -- string, encoding to use for the request (default: \"utf-8\")\n", " num_samples -- integer, number of samples to compute at prediction time (default: 100)\n", " quantiles -- list of strings specifying the quantiles to compute (default: [\"0.1\", \"0.5\", \"0.9\"])\n", " \n", " Return value: list of `pandas.DataFrame` objects, each containing the predictions\n", " \"\"\"\n", " prediction_times = [x.index[-1]+1 for x in ts]\n", " req = self.__encode_request(ts, cat, encoding, num_samples, quantiles)\n", " res = super(DeepARPredictor, self).predict(req)\n", " return self.__decode_response(res, prediction_times, encoding)\n", " \n", " def __encode_request(self, ts, cat, encoding, num_samples, quantiles):\n", " instances = [series_to_obj(ts[k], cat[k] if cat else None) for k in range(len(ts))]\n", " configuration = {\"num_samples\": num_samples, \"output_types\": [\"quantiles\"], \"quantiles\": quantiles}\n", " http_request_data = {\"instances\": instances, \"configuration\": configuration}\n", " return json.dumps(http_request_data).encode(encoding)\n", " \n", " def __decode_response(self, response, prediction_times, encoding):\n", " response_data = json.loads(response.decode(encoding))\n", " list_of_df = []\n", " for k in range(len(prediction_times)):\n", " prediction_index = pd.DatetimeIndex(start=prediction_times[k], freq=self.freq, periods=self.prediction_length)\n", " list_of_df.append(pd.DataFrame(data=response_data['predictions'][k]['quantiles'], index=prediction_index))\n", " return list_of_df" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "predictor = DeepARPredictor(\n", " endpoint=endpoint_name,\n", " sagemaker_session=sagemaker_session,\n", " content_type=\"application/json\"\n", ")\n", "predictor.set_prediction_parameters(freq, prediction_length)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Make predictions and plot results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can use the previously created `predictor` object. For simplicity, we will predict only the first few time series used for training, and compare the results with the actual data we kept in the test set." ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "list_of_df = predictor.predict(time_series_training[:10])\n", "actual_data = time_series[:10]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "prediction_length = 10\n", "context_length = 10" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "for k in range(len(list_of_df)):\n", " plt.figure(figsize=(12,6))\n", " actual_data[k][-prediction_length-context_length:].plot(label='target')\n", " p10 = list_of_df[k]['0.1']\n", " p90 = list_of_df[k]['0.9']\n", " plt.fill_between(p10.index, p10, p90, color='y', alpha=0.5, label='80% confidence interval')\n", " list_of_df[k]['0.5'].plot(label='prediction median')\n", " plt.legend()\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Delete endpoint" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:sagemaker:Deleting endpoint with name: Ml-Telcom-DemoForecast-deepar-2018-09-27-19-04-18-999\n" ] } ], "source": [ "sagemaker_session.delete_endpoint(endpoint_name)" ] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." }, "nbformat": 4, "nbformat_minor": 2 }