{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create Feature Groups in SageMaker" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sagemaker\n", "from sagemaker.feature_store.feature_group import FeatureDefinition, FeatureGroup, FeatureTypeEnum, DataCatalogConfig\n", "from sagemaker.feature_store.inputs import TableFormatEnum\n", "import time\n", "sagemaker_session = sagemaker.Session()\n", "\n", "# In order to avoid duplicates for 4a/4b experiements, we define 2 sets of feature groups for each trail\n", "\n", "feature_group_prefix_4a = 'redshift-sm-demo-4a-'\n", "feature_group_prefix_4b = 'redshift-sm-demo-4b-'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Delete existing feature groups if feature_group_prefix is matching" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def delete_existing_feature_group(feature_group_prefix):\n", " feature_groups = sagemaker_session.boto_session.client('sagemaker', region_name=sagemaker_session.boto_region_name).list_feature_groups() \n", " for fg in feature_groups['FeatureGroupSummaries']:\n", "\n", " if fg['FeatureGroupName'].startswith(feature_group_prefix) :\n", " sagemaker_session.delete_feature_group(fg['FeatureGroupName'])\n", " print(f\"FeatureGroupName - {fg['FeatureGroupName']} - Deleted\")\n", " \n", "delete_existing_feature_group(feature_group_prefix_4a)\n", "delete_existing_feature_group(feature_group_prefix_4b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def create_feature_group(feature_group_prefix, group_name, feature_group_fds,record_identifier_name,event_time_feature_name):\n", " feature_group_name = feature_group_prefix + group_name\n", " feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sagemaker_session, feature_definitions = feature_group_fds)\n", " feature_group.create(\n", " s3_uri=f\"s3://{sagemaker_session.default_bucket()}/{feature_group_name}\",\n", " record_identifier_name= record_identifier_name,\n", " event_time_feature_name=event_time_feature_name,\n", " enable_online_store = True,\n", " role_arn=sagemaker.get_execution_role(sagemaker_session),\n", " disable_glue_table_creation = False\n", " )\n", " return feature_group\n", "\n", "def check_feature_group_status(feature_group):\n", " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", " while status == \"Creating\":\n", " print(\"Waiting for Feature Group to be Created\")\n", " time.sleep(5)\n", " status = feature_group.describe().get(\"FeatureGroupStatus\")\n", " print(f\"FeatureGroup {feature_group.name} successfully created.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Feature group setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "users_fds = [\n", " FeatureDefinition(feature_name=\"userid\", feature_type=FeatureTypeEnum.STRING),\n", " FeatureDefinition(feature_name=\"user_drink_level\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_smoker\", feature_type=FeatureTypeEnum.STRING),\n", " FeatureDefinition(feature_name=\"user_budget\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_latitude\", feature_type=FeatureTypeEnum.STRING),\n", " FeatureDefinition(feature_name=\"user_longitude\", feature_type=FeatureTypeEnum.STRING),\n", " FeatureDefinition(feature_name=\"user_transport\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_interest_VARIETY\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_interest_ECO-FRIENDLY\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_interest_RETRO\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_interest_TECHNOLOGY\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_interest_NONE\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_personality_CONFORMIST\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_personality_THRIFTY-PROTECTOR\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_personality_HUNTER-OSTENTATIOUS\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"user_personality_HARD-WORKER\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"timestamp\", feature_type=FeatureTypeEnum.STRING)\n", "]\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "places_fds = [\n", " FeatureDefinition(feature_name=\"placeid\", feature_type=FeatureTypeEnum.INTEGRAL),\n", " FeatureDefinition(feature_name=\"place_latitude\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"place_longitude\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"place_smoking_area\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"place_alcohol\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"place_price\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"place_parking_lot\", feature_type=FeatureTypeEnum.FRACTIONAL),\n", " FeatureDefinition(feature_name=\"timestamp\", feature_type=FeatureTypeEnum.STRING)\n", "]\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ratings_fds = [\n", " FeatureDefinition(feature_name=\"userid\", feature_type=FeatureTypeEnum.STRING),\n", " FeatureDefinition(feature_name=\"ratingid\", feature_type=FeatureTypeEnum.STRING),\n", " FeatureDefinition(feature_name=\"placeid\", feature_type=FeatureTypeEnum.STRING),\n", " FeatureDefinition(feature_name=\"rating_overall\", feature_type=FeatureTypeEnum.INTEGRAL),\n", " FeatureDefinition(feature_name=\"timestamp\", feature_type=FeatureTypeEnum.STRING)\n", "]\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "# Create feature group for trail 4a\n", "users_feature_group_4a = create_feature_group(feature_group_prefix_4a, \"users\",users_fds,\"userid\",\"timestamp\")\n", "places_feature_group_4a = create_feature_group(feature_group_prefix_4a, \"places\",places_fds,\"placeid\",\"timestamp\")\n", "ratings_feature_group_4a = create_feature_group(feature_group_prefix_4a,\"ratings\",ratings_fds,\"ratingid\",\"timestamp\")\n", "\n", "# check_feature group status for trail 4a\n", "check_feature_group_status(users_feature_group_4a)\n", "check_feature_group_status(places_feature_group_4a)\n", "check_feature_group_status(ratings_feature_group_4a)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "print(users_feature_group_4a.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])\n", "print(places_feature_group_4a.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])\n", "print(ratings_feature_group_4a.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "# Create feature group for trail 4b\n", "users_feature_group_4b = create_feature_group(feature_group_prefix_4b, \"users\",users_fds,\"userid\",\"timestamp\")\n", "places_feature_group_4b = create_feature_group(feature_group_prefix_4b, \"places\",places_fds,\"placeid\",\"timestamp\")\n", "ratings_feature_group_4b = create_feature_group(feature_group_prefix_4b,\"ratings\",ratings_fds,\"ratingid\",\"timestamp\")\n", "\n", "\n", "# check_feature group status for trail 4b\n", "check_feature_group_status(users_feature_group_4b)\n", "check_feature_group_status(places_feature_group_4b)\n", "check_feature_group_status(ratings_feature_group_4b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "print(users_feature_group_4b.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])\n", "print(places_feature_group_4b.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])\n", "print(ratings_feature_group_4b.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "availableInstances": [ { "_defaultOrder": 0, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.t3.medium", "vcpuNum": 2 }, { "_defaultOrder": 1, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.t3.large", "vcpuNum": 2 }, { "_defaultOrder": 2, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.t3.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 3, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.t3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 4, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5.large", "vcpuNum": 2 }, { "_defaultOrder": 5, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 6, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 7, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 8, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 9, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 10, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 11, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 12, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5d.large", "vcpuNum": 2 }, { "_defaultOrder": 13, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5d.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 14, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5d.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 15, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5d.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 16, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5d.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 17, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5d.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 18, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5d.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 19, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 20, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": true, "memoryGiB": 0, "name": "ml.geospatial.interactive", "supportedImageNames": [ "sagemaker-geospatial-v1-0" ], "vcpuNum": 0 }, { "_defaultOrder": 21, "_isFastLaunch": true, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.c5.large", "vcpuNum": 2 }, { "_defaultOrder": 22, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.c5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 23, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.c5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 24, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.c5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 25, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 72, "name": "ml.c5.9xlarge", "vcpuNum": 36 }, { "_defaultOrder": 26, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 96, "name": "ml.c5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 27, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 144, "name": "ml.c5.18xlarge", "vcpuNum": 72 }, { "_defaultOrder": 28, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.c5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 29, "_isFastLaunch": true, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g4dn.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 30, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g4dn.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 31, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g4dn.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 32, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g4dn.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 33, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g4dn.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 34, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g4dn.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 35, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 61, "name": "ml.p3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 36, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 244, "name": "ml.p3.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 37, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 488, "name": "ml.p3.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 38, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.p3dn.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 39, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.r5.large", "vcpuNum": 2 }, { "_defaultOrder": 40, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.r5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 41, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.r5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 42, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.r5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 43, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.r5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 44, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.r5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 45, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 512, "name": "ml.r5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 46, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.r5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 47, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 48, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 49, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 50, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 51, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 52, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 53, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.g5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 54, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.g5.48xlarge", "vcpuNum": 192 }, { "_defaultOrder": 55, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 56, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4de.24xlarge", "vcpuNum": 96 } ], "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }