{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Notebook: Forecast Visualization\n", "\n", "This notebook gives an example visualization of a demand dataset and related timeseries data." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set the start and end dates of the forecast\n", "start_date = '2014-09-01' # YYYY-MM-DD\n", "end_date = '2015-02-01' # YYYY-MM-DD\n", "\n", "# provide the full CSV name uploaded to the /train folder in S3\n", "demand_dataset_name = 'DATASET_NAME.csv'\n", "related_dataset_name = 'DATASET_NAME.related.csv'\n", "\n", "# provide the exports to show from the export/ folder in S3 (these are created by Amazon Forecast)\n", "forecast_exports = [\n", " {\n", " 'path': 'export_date/export_name.csv',\n", " 'name': 'Forecast 1 - Name'\n", " }, \n", " {\n", " 'path': 'export_date/export_name.csv',\n", " 'name': 'Forecast 2 - Name'\n", " }\n", " \n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os \n", "import pandas as pd\n", "\n", "forecast_bucket = os.getenv('FORECAST_BUCKET')\n", "\n", "def download_data(path, date_column, header=None):\n", " data = pd.read_csv(f\"s3://{forecast_bucket}/{path}\", header=header, parse_dates=[date_column])\n", " \n", " # filter the data to the dates specified \n", " flt = (data[date_column] >= start_date) & (data[date_column] <= end_date)\n", " data = data.loc[flt]\n", " \n", " return data\n", "\n", "def get_exports(exports_list, date_column):\n", " for export in exports_list:\n", " export['data'] = download_data(f\"exports/{export.get('path')}\", date_column, header=0)\n", "\n", "\n", "demand = download_data(f\"train/{demand_dataset_name}\", date_column=1)\n", "relate = download_data(f\"train/{related_dataset_name}\", date_column=1)\n", "get_exports(forecast_exports, date_column='date') " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.dates as mdates\n", "import matplotlib.pyplot as plt\n", "import re \n", "\n", "years = mdates.YearLocator()\n", "months = mdates.MonthLocator()\n", "years_fmt = mdates.DateFormatter(\"%Y\")\n", "\n", "\n", "# there will be a subplot for every export, and an extra for the related timeseries\n", "fig, axes = plt.subplots(len(forecast_exports) + 1, sharex=True)\n", "fig.set_size_inches(18.5, 10.5)\n", "\n", "axes[0].xaxis.set_major_locator(years)\n", "axes[0].xaxis.set_major_formatter(years_fmt)\n", "axes[0].xaxis.set_minor_locator(months)\n", "axes[0].format_xdata = mdates.DateFormatter(\"%Y-%m-%d\")\n", "\n", "for idx, ax in enumerate(axes): \n", " ax.grid(True)\n", " \n", " if idx >= len(forecast_exports):\n", " continue\n", " \n", " title = forecast_exports[idx].get('name')\n", " data = forecast_exports[idx].get('data')\n", " \n", " ax.set_title(title)\n", " ax.set_xlabel(\"Date\")\n", " ax.set_ylabel(\"Demand\")\n", " \n", " # plot the demand\n", " ax.plot(demand[1], demand[2], linestyle='solid', color='DodgerBlue', label=\"Demand\")\n", " \n", " # plot a dashed line from the end of the data to the start of the forecast\n", " xs = [demand[1].iloc[-1], data.date.iloc[0]]\n", " ys = [demand[2].iloc[-1], data.p50.iloc[0]]\n", " ax.plot(xs, ys, linestyle='dashed', color='DodgerBlue')\n", "\n", " # plot each forecast\n", " ax.plot(data.date, data.p50, linestyle='dashed', color='DodgerBlue', label='P50') \n", "\n", " # plot the quantiles\n", " colors = ['LightBlue', 'LightSteelBlue']\n", " for pnn in [pnn for pnn in data.columns if re.match('^p\\d+$', pnn) and pnn != 'p50']:\n", " color = colors.pop()\n", " colors.insert(0, color)\n", " \n", " ax.fill_between(data.date, data.p50, data[pnn], color=color, label=pnn)\n", "\n", " ax.legend(loc=\"lower left\")\n", "\n", " \n", "# this will show a related timeseries on the same plot using the same x axis \n", "axes[-1].set_title(\"Item Price\")\n", "axes[-1].set_xlabel(\"Date\")\n", "axes[-1].set_ylabel(\"Price ($)\")\n", "axes[-1].plot(relate[1], relate[2])\n", "\n", "fig.autofmt_xdate()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false, "pycharm": { "name": "#%%" } } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 4 }