{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MDA demos" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make sure replace the following variables to the corresponding ones in your environment.\n", "- APIUrl" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import altair as alt\n", "import json\n", "import urllib3\n", "\n", "APIUrl = 'https://xxxxxxxx.execute-api.us-east-1.amazonaws.com/{}'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Real-time forecast for a specific meter" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Invoke API gateway to send forecast request via Lambda to Sagemaker endpoint\n", "# if using notebook, Sagemaker role needs to have API gateway invoke permission\n", "def get_forecast(meter_id, start, end):\n", " # Access API to get cluster endpoint name and temporary credentials\n", " http = urllib3.PoolManager()\n", " endpoint = \"forecast/{}?data_start={}&data_end={}\".format(meter_id, start, end)\n", " forecast_api_url = APIUrl.format(endpoint)\n", "\n", " response = http.request('GET', forecast_api_url)\n", " return response.data.decode()\n", "\n", "resp = get_forecast('MAC004734', \"2013-05-01\", \"2013-10-01\")\n", "\n", "# convert response to dataframe and visualize\n", "df = pd.read_json(resp)\n", "df.plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get forecast from pre-calculation result, can be one or many meters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```python\n", "from pyathena import connect\n", "\n", "region = 'us-east-1'\n", "athena_output_bucket = \"athena_output_bucket_4711\"\n", "\n", "connection = connect(s3_staging_dir='s3://{}/'.format(athena_output_bucket), region_name=region)\n", "\n", "meter_range = ['MAC000002', 'MAC000010']\n", "query = '''select meter_id, datetime, consumption from \"meter-data\".forecast\n", " where meter_id between {} and {};'''.format(meter_range[0], meter_range[1])\n", "df = pd.read_sql(query, connection)\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get Anomaly for a specific meter \n", "This visualization example requires weather data although the API supports w/o weather data." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "def plot_anomalies_wo_weather(forecasted):\n", " interval = alt.Chart(forecasted).mark_area(interpolate=\"basis\", color = '#7FC97F').encode(\n", " x=alt.X('ds:T', title ='date'),\n", " y='yhat_upper',\n", " y2='yhat_lower',\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper']\n", " ).interactive().properties(\n", " title='Anomaly Detection'\n", " )\n", "\n", " fact = alt.Chart(forecasted).mark_line(color = '#774009').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption')\n", " ).interactive()\n", "\n", " #apparenttemperature = alt.Chart(forecasted).mark_line(color = '#40F9F9').encode(\n", " # x='ds:T',\n", " # y='apparenttemperature'\n", " #)\n", "\n", " anomalies = alt.Chart(forecasted[forecasted.anomaly!=0]).mark_circle(size=30, color = 'Red').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption'),\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper'],\n", " size = alt.Size( 'importance', legend=None)\n", " ).interactive()\n", "\n", " return alt.layer(interval, fact, anomalies)\\\n", " .properties(width=870, height=450)\\\n", " .configure_title(fontSize=20)\n", "\n", "def plot_anomalies(forecasted):\n", " interval = alt.Chart(forecasted).mark_area(interpolate=\"basis\", color = '#7FC97F').encode(\n", " x=alt.X('ds:T', title ='date'),\n", " y='yhat_upper',\n", " y2='yhat_lower',\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper', 'temperature', 'apparenttemperature']\n", " ).interactive().properties(\n", " title='Anomaly Detection'\n", " )\n", "\n", " fact = alt.Chart(forecasted).mark_line(color = '#774009').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption')\n", " ).interactive()\n", "\n", " apparenttemperature = alt.Chart(forecasted).mark_line(color = '#40F9F9').encode(\n", " x='ds:T',\n", " y='apparenttemperature'\n", " )\n", "\n", " anomalies = alt.Chart(forecasted[forecasted.anomaly!=0]).mark_circle(size=30, color = 'Red').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption'),\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper', 'temperature', 'apparenttemperature'],\n", " size = alt.Size( 'importance', legend=None)\n", " ).interactive()\n", "\n", " return alt.layer(interval, fact, apparenttemperature, anomalies)\\\n", " .properties(width=870, height=450)\\\n", " .configure_title(fontSize=20)\n", "\n", "def get_forecast(meter_id, start, end, outlier_only):\n", " # Access API to get cluster endpoint name and temporary credentials\n", " http = urllib3.PoolManager()\n", " endpoint = \"anomaly/{}?data_start={}&data_end={}&outlier_only={}\".format(meter_id, start, end, outlier_only)\n", " anomaly_api_url = APIUrl.format(endpoint)\n", "\n", "\n", " response = http.request('GET', anomaly_api_url)\n", "\n", " return response.data.decode()\n", "\n", "# Call rest API to get anomaly\n", "resp = get_forecast('MAC000005', \"2013-01-01\", \"2013-12-31\", 0)\n", "\n", "# convert response to dataframe and visualize\n", "df = pd.read_json(resp)\n", "plot_anomalies_wo_weather(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get outage" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"startDateTime\": \"2013-01-03 09:00:01\", \"endDateTime\": \"2013-01-03 10:59:59\"}\n" ] }, { "data": { "text/html": [ "
\n", " | meter_id | \n", "lat | \n", "long | \n", "
---|---|---|---|
0 | \n", "MAC004027 | \n", "40.697940 | \n", "-73.969868 | \n", "
3 | \n", "MAC001104 | \n", "40.720195 | \n", "-74.010301 | \n", "
6 | \n", "MAC002600 | \n", "40.685144 | \n", "-73.953809 | \n", "
9 | \n", "MAC002607 | \n", "40.736529 | \n", "-74.006180 | \n", "
12 | \n", "MAC001406 | \n", "40.671649 | \n", "-73.963115 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
1485 | \n", "MAC001036 | \n", "40.793770 | \n", "-73.971888 | \n", "
1488 | \n", "MAC001060 | \n", "40.684751 | \n", "-73.999173 | \n", "
1491 | \n", "MAC002352 | \n", "40.729515 | \n", "-73.990753 | \n", "
1494 | \n", "MAC002745 | \n", "40.769943 | \n", "-73.960607 | \n", "
1497 | \n", "MAC000744 | \n", "40.668627 | \n", "-73.987001 | \n", "
500 rows × 3 columns
\n", "