import boto3 import botocore import logging from crhelper import CfnResource from botocore.exceptions import ClientError logger = logging.getLogger(__name__) sm = boto3.client('sagemaker') # cfnhelper makes it easier to implement a CloudFormation custom resource helper = CfnResource() # CFN Handlers def handler(event, context): helper(event, context) @helper.create def create_handler(event, context): """ Called when CloudFormation custom resource sends the create event """ create_monitoring_schedule(event) @helper.delete def delete_handler(event, context): """ Called when CloudFormation custom resource sends the delete event """ schedule_name = get_schedule_name(event) delete_monitoring_schedule(schedule_name) @helper.poll_create def poll_create(event, context): """ Return true if the resource has been created and false otherwise so CloudFormation polls again. """ schedule_name = get_schedule_name(event) logger.info('Polling for creation of schedule: %s', schedule_name) return is_schedule_ready(schedule_name) @helper.update def noop(): """ Not currently implemented but crhelper will throw an error if it isn't added """ pass # Helper Functions def get_schedule_name(event): return event['ResourceProperties']['ScheduleName'] def create_monitoring_schedule(event): schedule_name = get_schedule_name(event) monitoring_schedule_config = create_monitoring_schedule_config(event) logger.info('Creating monitoring schedule with name: %s', schedule_name) sm.create_monitoring_schedule( MonitoringScheduleName=schedule_name, MonitoringScheduleConfig=monitoring_schedule_config) def is_schedule_ready(schedule_name): is_ready = False schedule = sm.describe_monitoring_schedule(MonitoringScheduleName=schedule_name) status = schedule['MonitoringScheduleStatus'] if status == 'Scheduled': logger.info('Monitoring schedule (%s) is ready', schedule_name) is_ready = True elif status == 'Pending': logger.info('Monitoring schedule (%s) still creating, waiting and polling again...', schedule_name) else: raise Exception('Monitoring schedule ({}) has unexpected status: {}'.format(schedule_name, status)) return is_ready def create_monitoring_schedule_config(event): props = event['ResourceProperties'] return { "ScheduleConfig": { "ScheduleExpression": props["ScheduleExpression"], }, "MonitoringJobDefinition": { "BaselineConfig": { "ConstraintsResource": { "S3Uri": props['BaselineConstraintsUri'], }, "StatisticsResource": { "S3Uri": props['BaselineStatisticsUri'], } }, "MonitoringInputs": [ { "EndpointInput": { "EndpointName": props["EndpointName"], "LocalPath": props["InputLocalPath"], } } ], "MonitoringOutputConfig": { "MonitoringOutputs": [ { "S3Output": { "S3Uri": props["OutputS3URI"], "LocalPath": props["OutputLocalPath"], } } ], }, "MonitoringResources": { "ClusterConfig": { "InstanceCount": 1, "InstanceType": "ml.t3.medium", "VolumeSizeInGB": 50, } }, "MonitoringAppSpecification": { "ImageUri": props["ImageURI"] }, "StoppingCondition": { "MaxRuntimeInSeconds": 300 }, "RoleArn": props["PassRoleArn"], } } def delete_monitoring_schedule(schedule_name): logger.info('Deleting schedule: %s', schedule_name) try: sm.delete_monitoring_schedule(MonitoringScheduleName=schedule_name) except ClientError as e: if e.response['Error']['Code'] == 'ResourceNotFound': logger.info('Resource not found, nothing to delete') else: logger.error('Unexpected error while trying to delete monitoring schedule') raise e