{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MDA demos" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make sure replace the following variables to the corresponding ones in your environment.\n", "- APIUrl" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import altair as alt\n", "import json\n", "import urllib3\n", "\n", "APIUrl = 'https://xxxxxxxx.execute-api.us-east-1.amazonaws.com/{}'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Real-time forecast for a specific meter" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Invoke API gateway to send forecast request via Lambda to Sagemaker endpoint\n", "# if using notebook, Sagemaker role needs to have API gateway invoke permission\n", "def get_forecast(meter_id, start, end):\n", " # Access API to get cluster endpoint name and temporary credentials\n", " http = urllib3.PoolManager()\n", " endpoint = \"forecast/{}?data_start={}&data_end={}\".format(meter_id, start, end)\n", " forecast_api_url = APIUrl.format(endpoint)\n", "\n", " response = http.request('GET', forecast_api_url)\n", " return response.data.decode()\n", "\n", "resp = get_forecast('MAC004734', \"2013-05-01\", \"2013-10-01\")\n", "\n", "# convert response to dataframe and visualize\n", "df = pd.read_json(resp)\n", "df.plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get forecast from pre-calculation result, can be one or many meters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```python\n", "from pyathena import connect\n", "\n", "region = 'us-east-1'\n", "athena_output_bucket = \"athena_output_bucket_4711\"\n", "\n", "connection = connect(s3_staging_dir='s3://{}/'.format(athena_output_bucket), region_name=region)\n", "\n", "meter_range = ['MAC000002', 'MAC000010']\n", "query = '''select meter_id, datetime, consumption from \"meter-data\".forecast\n", " where meter_id between {} and {};'''.format(meter_range[0], meter_range[1])\n", "df = pd.read_sql(query, connection)\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get Anomaly for a specific meter \n", "This visualization example requires weather data although the API supports w/o weather data." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "def plot_anomalies_wo_weather(forecasted):\n", " interval = alt.Chart(forecasted).mark_area(interpolate=\"basis\", color = '#7FC97F').encode(\n", " x=alt.X('ds:T', title ='date'),\n", " y='yhat_upper',\n", " y2='yhat_lower',\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper']\n", " ).interactive().properties(\n", " title='Anomaly Detection'\n", " )\n", "\n", " fact = alt.Chart(forecasted).mark_line(color = '#774009').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption')\n", " ).interactive()\n", "\n", " #apparenttemperature = alt.Chart(forecasted).mark_line(color = '#40F9F9').encode(\n", " # x='ds:T',\n", " # y='apparenttemperature'\n", " #)\n", "\n", " anomalies = alt.Chart(forecasted[forecasted.anomaly!=0]).mark_circle(size=30, color = 'Red').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption'),\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper'],\n", " size = alt.Size( 'importance', legend=None)\n", " ).interactive()\n", "\n", " return alt.layer(interval, fact, anomalies)\\\n", " .properties(width=870, height=450)\\\n", " .configure_title(fontSize=20)\n", "\n", "def plot_anomalies(forecasted):\n", " interval = alt.Chart(forecasted).mark_area(interpolate=\"basis\", color = '#7FC97F').encode(\n", " x=alt.X('ds:T', title ='date'),\n", " y='yhat_upper',\n", " y2='yhat_lower',\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper', 'temperature', 'apparenttemperature']\n", " ).interactive().properties(\n", " title='Anomaly Detection'\n", " )\n", "\n", " fact = alt.Chart(forecasted).mark_line(color = '#774009').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption')\n", " ).interactive()\n", "\n", " apparenttemperature = alt.Chart(forecasted).mark_line(color = '#40F9F9').encode(\n", " x='ds:T',\n", " y='apparenttemperature'\n", " )\n", "\n", " anomalies = alt.Chart(forecasted[forecasted.anomaly!=0]).mark_circle(size=30, color = 'Red').encode(\n", " x='ds:T',\n", " y=alt.Y('consumption', title='consumption'),\n", " tooltip=['ds', 'consumption', 'yhat_lower', 'yhat_upper', 'temperature', 'apparenttemperature'],\n", " size = alt.Size( 'importance', legend=None)\n", " ).interactive()\n", "\n", " return alt.layer(interval, fact, apparenttemperature, anomalies)\\\n", " .properties(width=870, height=450)\\\n", " .configure_title(fontSize=20)\n", "\n", "def get_forecast(meter_id, start, end, outlier_only):\n", " # Access API to get cluster endpoint name and temporary credentials\n", " http = urllib3.PoolManager()\n", " endpoint = \"anomaly/{}?data_start={}&data_end={}&outlier_only={}\".format(meter_id, start, end, outlier_only)\n", " anomaly_api_url = APIUrl.format(endpoint)\n", "\n", "\n", " response = http.request('GET', anomaly_api_url)\n", "\n", " return response.data.decode()\n", "\n", "# Call rest API to get anomaly\n", "resp = get_forecast('MAC000005', \"2013-01-01\", \"2013-12-31\", 0)\n", "\n", "# convert response to dataframe and visualize\n", "df = pd.read_json(resp)\n", "plot_anomalies_wo_weather(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get outage" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"startDateTime\": \"2013-01-03 09:00:01\", \"endDateTime\": \"2013-01-03 10:59:59\"}\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meter_idlatlong
0MAC00402740.697940-73.969868
3MAC00110440.720195-74.010301
6MAC00260040.685144-73.953809
9MAC00260740.736529-74.006180
12MAC00140640.671649-73.963115
............
1485MAC00103640.793770-73.971888
1488MAC00106040.684751-73.999173
1491MAC00235240.729515-73.990753
1494MAC00274540.769943-73.960607
1497MAC00074440.668627-73.987001
\n", "

500 rows × 3 columns

\n", "
" ], "text/plain": [ " meter_id lat long\n", "0 MAC004027 40.697940 -73.969868\n", "3 MAC001104 40.720195 -74.010301\n", "6 MAC002600 40.685144 -73.953809\n", "9 MAC002607 40.736529 -74.006180\n", "12 MAC001406 40.671649 -73.963115\n", "... ... ... ...\n", "1485 MAC001036 40.793770 -73.971888\n", "1488 MAC001060 40.684751 -73.999173\n", "1491 MAC002352 40.729515 -73.990753\n", "1494 MAC002745 40.769943 -73.960607\n", "1497 MAC000744 40.668627 -73.987001\n", "\n", "[500 rows x 3 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_outage(start, end):\n", " # Access API to get cluster endpoint name and temporary credentials\n", " http = urllib3.PoolManager()\n", " endpoint = \"outage?start_date_time={}&end_date_time={}\".format(start, end)\n", " outageAPIUrl = APIUrl.format(endpoint)\n", "\n", " response = http.request('GET', outageAPIUrl)\n", "\n", " return response.data\n", "\n", "# Call rest API to get outages\n", "resp = get_outage(\"2013-01-03 09:00:01\", \"2013-01-03 10:59:59\")\n", "data = json.loads(resp)\n", "\n", "if data['Items']:\n", " df = pd.DataFrame(data['Items'])\n", " df_result = df[['meter_id', 'error_value', 'lat', 'long']].drop_duplicates()\n", " df_result\n", "else:\n", " print(\"No errors found during the given time window.\")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from vega_datasets import data\n", "\n", "counties = alt.topo_feature(data.us_10m.url, 'counties')\n", "\n", "# New York state background\n", "# County id code starts with state id. 36 is NY state\n", "map_newyork =(\n", " alt.Chart(data = counties)\n", " .mark_geoshape(\n", " stroke='black',\n", " strokeWidth=1\n", " )\n", " .transform_calculate(state_id = \"(datum.id / 1000)|0\")\n", " .transform_filter((alt.datum.state_id)==36)\n", " .encode(color=alt.value('lightgray'))\n", " .properties(\n", " width=800,\n", " height=640\n", " )\n", ")\n", "\n", "# meter positions on background\n", "points = alt.Chart(df_result.head(500)).mark_circle().encode(\n", " longitude='long:Q',\n", " latitude='lat:Q',\n", " color=alt.value('orange'),\n", " tooltip=['meter_id']\n", ").properties(\n", " title='Power outage in New York'\n", ")\n", "\n", "map_newyork + points" ] } ], "metadata": { "kernelspec": { "display_name": "Meter_demo", "language": "python", "name": "meter_demo" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }