#### Create Feature Groups in SageMaker

In [None]:
import sagemaker
from sagemaker.feature_store.feature_group import FeatureDefinition, FeatureGroup, FeatureTypeEnum, DataCatalogConfig
from sagemaker.feature_store.inputs import TableFormatEnum
import time
sagemaker_session = sagemaker.Session()

# In order to avoid duplicates for 4a/4b experiements, we define 2 sets of feature groups for each trail

feature_group_prefix_4a = 'redshift-sm-demo-4a-'
feature_group_prefix_4b = 'redshift-sm-demo-4b-'

#### Delete existing feature groups if feature_group_prefix is matching

In [None]:
def delete_existing_feature_group(feature_group_prefix):
    feature_groups = sagemaker_session.boto_session.client('sagemaker', region_name=sagemaker_session.boto_region_name).list_feature_groups() 
    for fg in feature_groups['FeatureGroupSummaries']:

        if fg['FeatureGroupName'].startswith(feature_group_prefix) :
            sagemaker_session.delete_feature_group(fg['FeatureGroupName'])
            print(f"FeatureGroupName - {fg['FeatureGroupName']} - Deleted")
            
delete_existing_feature_group(feature_group_prefix_4a)
delete_existing_feature_group(feature_group_prefix_4b)

In [None]:
def create_feature_group(feature_group_prefix, group_name, feature_group_fds,record_identifier_name,event_time_feature_name):
    feature_group_name = feature_group_prefix + group_name
    feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sagemaker_session,  feature_definitions = feature_group_fds)
    feature_group.create(
        s3_uri=f"s3://{sagemaker_session.default_bucket()}/{feature_group_name}",
        record_identifier_name= record_identifier_name,
        event_time_feature_name=event_time_feature_name,
        enable_online_store = True,
        role_arn=sagemaker.get_execution_role(sagemaker_session),
        disable_glue_table_creation = False
    )
    return feature_group

def check_feature_group_status(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
        print("Waiting for Feature Group to be Created")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    print(f"FeatureGroup {feature_group.name} successfully created.")

#### Feature group setup

In [None]:
users_fds = [
        FeatureDefinition(feature_name="userid", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="user_drink_level", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_smoker", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="user_budget", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_latitude", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="user_longitude", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="user_transport", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_interest_VARIETY", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_interest_ECO-FRIENDLY", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_interest_RETRO", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_interest_TECHNOLOGY", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_interest_NONE", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_personality_CONFORMIST", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_personality_THRIFTY-PROTECTOR", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_personality_HUNTER-OSTENTATIOUS", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="user_personality_HARD-WORKER", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="timestamp", feature_type=FeatureTypeEnum.STRING)
]



In [None]:
places_fds = [
        FeatureDefinition(feature_name="placeid", feature_type=FeatureTypeEnum.INTEGRAL),
        FeatureDefinition(feature_name="place_latitude", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="place_longitude", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="place_smoking_area", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="place_alcohol", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="place_price", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="place_parking_lot", feature_type=FeatureTypeEnum.FRACTIONAL),
        FeatureDefinition(feature_name="timestamp", feature_type=FeatureTypeEnum.STRING)
]



In [None]:
ratings_fds = [
        FeatureDefinition(feature_name="userid", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="ratingid", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="placeid", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="rating_overall", feature_type=FeatureTypeEnum.INTEGRAL),
        FeatureDefinition(feature_name="timestamp", feature_type=FeatureTypeEnum.STRING)
]



In [None]:
# Create feature group for trail 4a
users_feature_group_4a = create_feature_group(feature_group_prefix_4a, "users",users_fds,"userid","timestamp")
places_feature_group_4a = create_feature_group(feature_group_prefix_4a, "places",places_fds,"placeid","timestamp")
ratings_feature_group_4a = create_feature_group(feature_group_prefix_4a,"ratings",ratings_fds,"ratingid","timestamp")

# check_feature group status for trail 4a
check_feature_group_status(users_feature_group_4a)
check_feature_group_status(places_feature_group_4a)
check_feature_group_status(ratings_feature_group_4a)

In [None]:
print(users_feature_group_4a.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])
print(places_feature_group_4a.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])
print(ratings_feature_group_4a.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])

In [None]:
# Create feature group for trail 4b
users_feature_group_4b = create_feature_group(feature_group_prefix_4b, "users",users_fds,"userid","timestamp")
places_feature_group_4b = create_feature_group(feature_group_prefix_4b, "places",places_fds,"placeid","timestamp")
ratings_feature_group_4b = create_feature_group(feature_group_prefix_4b,"ratings",ratings_fds,"ratingid","timestamp")


# check_feature group status for trail 4b
check_feature_group_status(users_feature_group_4b)
check_feature_group_status(places_feature_group_4b)
check_feature_group_status(ratings_feature_group_4b)

In [None]:
print(users_feature_group_4b.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])
print(places_feature_group_4b.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])
print(ratings_feature_group_4b.describe()['OfflineStoreConfig']['DataCatalogConfig']['TableName'])