## Module 0: Setup Notebook

The notebook is the starting point to the entire workshop. It sets up the dataset and defining the parameters that can be used in the rest of the modules.

By separating the dataset loading and parameter definition from the rest of the modules, each module can be self-contained, making it easier to modify and extend the codebase as the workshop progresses. 

**You can choose to run the entire workshop in sequence, or run this notebook and then pick-and-choose the module you want to run individually**

Install required and/or update libraries

In [None]:
!pip install -Uq pip --quiet

!pip install -Uq awswrangler sagemaker boto3 --quiet

### Import & Global Variables

In [None]:
import boto3
import sagemaker
import pandas as pd
import json

sagemaker_session = sagemaker.Session()

region = sagemaker_session.boto_region_name
sagemaker_role = sagemaker.get_execution_role()

bucket = sagemaker_session.default_bucket()

s3_client = boto3.client("s3", region_name=region)
sagemaker_client = boto3.client("sagemaker")

prefix = "telco-5g-observability"

fg_name = "5gcell-anomaly-features"

%store region
%store bucket
%store sagemaker_role
%store prefix
%store fg_name

## Setup Raw Data

In [None]:
df = pd.read_csv("data/5gcell.csv")

cell5g_s3_key = f"{prefix}/data/raw/5gcell.csv"

s3_client.upload_file(
 Filename="data/5gcell.csv", Bucket=bucket, Key=cell5g_s3_key
)

raw_data_s3 = f's3://{bucket}/{cell5g_s3_key}'

print(f'Raw 5g dataset is located at {raw_data_s3}')

%store raw_data_s3

## Create a Feature Group

In [None]:
## clean out left over feature groups
try:
 sagemaker_client.delete_feature_group(
 FeatureGroupName= fg_name
 )
 time.sleep(20)
 print(f"Delete exisiting feature group: {fg_name}.")
 
except Exception as e:
 print(f"{fg_name} is not available. Create New.")

In [None]:
from sagemaker.feature_store.feature_group import FeatureGroup

df_fg = pd.read_csv("data/feature_store.csv")
timestamp = pd.to_datetime("now").timestamp()

df_fg["EventTime"] = timestamp


anomaly_feature_group = FeatureGroup(name=fg_name, sagemaker_session=sagemaker_session)

anomaly_feature_group.load_feature_definitions(data_frame=df_fg)

anomaly_feature_group.create(
 s3_uri=f"s3://{bucket}/{prefix}",
 record_identifier_name="location_id",
 event_time_feature_name="EventTime",
 role_arn=sagemaker_role
)

In [None]:
import time
def wait_for_feature_group_creation_complete(feature_group):
 status = feature_group.describe().get("FeatureGroupStatus")
 while status == "Creating":
 print("Waiting for Feature Group Creation")
 time.sleep(5)
 status = feature_group.describe().get("FeatureGroupStatus")
 if status != "Created":
 raise RuntimeError(f"Failed to create feature group {feature_group.name}")
 print(f"FeatureGroup {feature_group.name} successfully created.")


wait_for_feature_group_creation_complete(feature_group=anomaly_feature_group)

In [None]:
anomaly_feature_group.ingest(data_frame=df_fg, max_workers=3, wait=True)