{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "2af41bc8-6b9e-4b4d-93de-12fb94a89c40", "metadata": { "tags": [] }, "outputs": [], "source": [ " %load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "id": "278b6957-bf12-4bdb-b529-f2b3e9c2e738", "metadata": { "tags": [] }, "outputs": [], "source": [ "import sys\n", "sys.path.insert(0, '/root/sagemaker_ml_insights_repo/notebooks/')" ] }, { "cell_type": "code", "execution_count": null, "id": "7876b78a-f1a1-4ea8-bb5a-3b7f5abcfaf5", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pyathena\n", "from pyathena.pandas.cursor import PandasCursor\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", "from fastprogress import progress_bar\n", "from tqdm.notebook import tqdm" ] }, { "cell_type": "code", "execution_count": null, "id": "6a0e917e-b1a8-4394-a46c-4467d6deb331", "metadata": { "tags": [] }, "outputs": [], "source": [ "import mlsec.utils as mlsecutils\n", "from mlsec.tsat import *" ] }, { "cell_type": "code", "execution_count": null, "id": "45c65faf-e342-4ae8-846a-dbd2ced4177d", "metadata": { "tags": [] }, "outputs": [], "source": [ "mlsecutils.set_pandas_colwidth(500)" ] }, { "cell_type": "code", "execution_count": null, "id": "1c224f81-39bb-4e35-9ef6-4a19e081a98a", "metadata": { "tags": [] }, "outputs": [], "source": [ "con = pyathena.connect(region_name='us-east-1',\n", " work_group='security_lake_insights',\n", " cursor_class=PandasCursor).cursor()" ] }, { "cell_type": "code", "execution_count": null, "id": "b8da4858-aecc-436f-a0cf-7d7a0d71d61a", "metadata": { "tags": [] }, "outputs": [], "source": [ "TABLE = 'amazon_security_lake_glue_db_us_east_1.amazon_security_lake_table_us_east_1_sh_findings_1_0'" ] }, { "attachments": {}, "cell_type": "markdown", "id": "ffd985f7-cc8b-46c4-b889-cd3e2d2cfdc9", "metadata": {}, "source": [ "## Query Security Hub Findings data in Security Lake using Athena Connection" ] }, { "cell_type": "code", "execution_count": null, "id": "fe2235a1-7091-467d-a8c1-33bc1dad3bb6", "metadata": { "tags": [] }, "outputs": [], "source": [ "## Pull data and aggregate by Product|Finding and date\n", "## CONVENTION: ALWAYS HAVE \"timestamp\" and \"value\" (frequency) fields in the select statement\n", "\n", "sql = f\"\"\"\n", "select productname || ' | ' || types[1] as finding_type\n", " ,timestamp\n", " ,count(*) as value\n", "from\n", "(\n", "select metadata.product.feature.name as ProductName,\n", "finding.types as types,\n", "cast(from_unixtime(time/1000) as timestamp) as timestamp\n", "FROM {TABLE}\n", "WHERE eventDay > '20220101'\n", ") tmp\n", "group by 1, 2\n", "\"\"\"\n", "\n", "keys = ['finding_type']" ] }, { "cell_type": "code", "execution_count": null, "id": "eb5b29f7-6da4-4d90-95b7-293dfd0a4a2a", "metadata": { "tags": [] }, "outputs": [], "source": [ "results = con.execute(sql).as_pandas()" ] }, { "cell_type": "code", "execution_count": null, "id": "7c44b6f9-250c-4f3b-b0aa-0f3b5c724c31", "metadata": { "tags": [] }, "outputs": [], "source": [ " results.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "8a7cdcca-9ab4-4cf9-b4c4-2353723fecbd", "metadata": { "tags": [] }, "outputs": [], "source": [ "results.head(5)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "67020cb5-cc91-4fcb-8120-e7f654aa31b9", "metadata": {}, "source": [ "## Split results data into timeseries dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "fc99a1ef-34ee-4dc5-ac69-4855e74282f1", "metadata": { "tags": [] }, "outputs": [], "source": [ " def df_to_ts(df):\n", " ts = pd.Series(df['value'])\n", " ts.index = pd.to_datetime(df['timestamp'])\n", " ts = ts.resample('D').sum().fillna(0)\n", " return ts" ] }, { "cell_type": "code", "execution_count": null, "id": "9611be6b-f41e-48af-b6d4-cef1f509d35e", "metadata": { "tags": [] }, "outputs": [], "source": [ " def split_sql_results_into_timeseries(sql_results, keys):\n", " timeseries = []\n", " for _, df in sql_results.groupby(keys, as_index=False):\n", " ts = df_to_ts(df)\n", " keyvals = {}\n", " for key in keys:\n", " keyvals[key] = df.iloc[0][key]\n", " timeseries.append((keyvals, ts))\n", "# timeseries = [({split_key: df.iloc[0][split_key]}, df_to_ts(df)) for _, df in results.groupby(keys, as_index=False)]\n", " return timeseries" ] }, { "cell_type": "code", "execution_count": null, "id": "57a068c7-4ba8-47cc-805b-8548c519c747", "metadata": { "tags": [] }, "outputs": [], "source": [ " timeseries = split_sql_results_into_timeseries(results, keys)" ] }, { "cell_type": "code", "execution_count": null, "id": "e33fec57-82e8-4068-bea2-c08ff3e427b3", "metadata": { "tags": [] }, "outputs": [], "source": [ " len(timeseries)" ] }, { "cell_type": "code", "execution_count": null, "id": "6827165f-410c-4a05-bd69-dbe3af761f9a", "metadata": { "tags": [] }, "outputs": [], "source": [ "timeseries = [(key, ts) for key, ts in timeseries if len(ts) > 5]" ] }, { "cell_type": "code", "execution_count": null, "id": "221109d9-e155-47f1-97a4-3e2efc0c62fe", "metadata": { "tags": [] }, "outputs": [], "source": [ "len(timeseries)" ] }, { "cell_type": "code", "execution_count": null, "id": "3b06b1d4-e5ab-4b6f-a166-59da3e3be92e", "metadata": { "tags": [] }, "outputs": [], "source": [ " milo_tss = [MiloTimeseries(ts=ts, source_sql=sql, keyvals=keyvals) for keyvals,ts in timeseries]" ] }, { "cell_type": "code", "execution_count": null, "id": "6a350268-8023-4d4b-8bf7-86d4532b0f95", "metadata": { "tags": [] }, "outputs": [], "source": [ " milo_tss[0].keyvals" ] }, { "cell_type": "code", "execution_count": null, "id": "c23dc38f-a32f-4787-97dc-8d5f713d7ebc", "metadata": { "tags": [] }, "outputs": [], "source": [ " %store milo_tss" ] }, { "cell_type": "code", "execution_count": null, "id": "f0b0b332-78af-4284-a9c9-70b49223060a", "metadata": { "tags": [] }, "outputs": [], "source": [ "print(\"source_sql\\n\", milo_tss[0].source_sql)\n", "print(\"_wheres\\n\", milo_tss[0]._wheres)\n", "print(\"_selects\\n\", milo_tss[0]._selects)" ] }, { "cell_type": "code", "execution_count": null, "id": "ae3f4826-d783-4dc9-bea0-d28a8966c0cf", "metadata": { "tags": [] }, "outputs": [], "source": [ "for milo_ts in milo_tss:\n", " milo_ts.plot()\n", " plt.show()" ] } ], "metadata": { "availableInstances": [ { "_defaultOrder": 0, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.t3.medium", "vcpuNum": 2 }, { "_defaultOrder": 1, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.t3.large", "vcpuNum": 2 }, { "_defaultOrder": 2, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.t3.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 3, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.t3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 4, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5.large", "vcpuNum": 2 }, { "_defaultOrder": 5, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 6, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 7, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 8, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 9, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 10, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 11, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 12, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5d.large", "vcpuNum": 2 }, { "_defaultOrder": 13, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5d.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 14, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5d.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 15, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5d.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 16, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5d.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 17, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5d.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 18, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5d.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 19, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 20, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": true, "memoryGiB": 0, "name": "ml.geospatial.interactive", "supportedImageNames": [ "sagemaker-geospatial-v1-0" ], "vcpuNum": 0 }, { "_defaultOrder": 21, "_isFastLaunch": true, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.c5.large", "vcpuNum": 2 }, { "_defaultOrder": 22, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.c5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 23, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.c5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 24, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.c5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 25, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 72, "name": "ml.c5.9xlarge", "vcpuNum": 36 }, { "_defaultOrder": 26, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 96, "name": "ml.c5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 27, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 144, "name": "ml.c5.18xlarge", "vcpuNum": 72 }, { "_defaultOrder": 28, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.c5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 29, "_isFastLaunch": true, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g4dn.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 30, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g4dn.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 31, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g4dn.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 32, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g4dn.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 33, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g4dn.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 34, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g4dn.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 35, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 61, "name": "ml.p3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 36, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 244, "name": "ml.p3.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 37, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 488, "name": "ml.p3.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 38, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.p3dn.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 39, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.r5.large", "vcpuNum": 2 }, { "_defaultOrder": 40, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.r5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 41, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.r5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 42, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.r5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 43, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.r5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 44, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.r5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 45, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 512, "name": "ml.r5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 46, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.r5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 47, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 48, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 49, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 50, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 51, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 52, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 53, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.g5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 54, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.g5.48xlarge", "vcpuNum": 192 } ], "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }