# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 import json import boto3 import sys import os import ast import mimetypes def lambda_handler(event, context): # TODO implement print("input event::") print(event) codecommit = boto3.client('codecommit') s3 = boto3.client('s3') glue = boto3.client('glue') repositoryName, os.environ['branch'] = get_repository_details(event, codecommit) #get_trigger_config trigger_config = get_trigger_config(codecommit,repositoryName) # Copy the codecommit repository files to s3 extra_py_files=[] extra_files=[] for blob in get_blob_list(codecommit, repositoryName, os.environ['branch']): path = trigger_config['bucket_prefix']+"/"+blob['path'] print("path::"+path) content = (codecommit.get_blob(repositoryName=repositoryName, blobId=blob['blobId']))['content'] # content_type = mimetypes.guess_type(path)[0] if path.endswith('.py') or path.endswith('.zip') or path.endswith('.tar'): extra_py_files.append("s3://"+trigger_config['bucket_name']+"/"+path) else: extra_files.append("s3://"+trigger_config['bucket_name']+"/"+path) s3.put_object(Bucket=trigger_config['bucket_name'], Body=(content), Key=path) #Create Schedule - trigger response=create_job_schedule(glue, trigger_config=trigger_config, extra_py_files=extra_py_files, extra_files=extra_files) #Start Glue Job try: if trigger_config['start_job_on_push'].lower()=='yes': print("Initiating start_glue_job") response=start_glue_job(glue,job_name='ETLDevSandbox_GlueJob', trigger_config=trigger_config, extra_py_files=extra_py_files, extra_files=extra_files) except: pass return { 'statusCode': 200, 'body': json.dumps('Code Deployed Successfully.') } def create_job_schedule(glue, trigger_config=None, extra_py_files=None, extra_files=None): try: trigger_name=trigger_config['trigger_name'] except: trigger_name='myTrigger-'+datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") try: trigger_type=trigger_config['trigger_type'] except: trigger_type='SCHEDULED' try: job_name=trigger_config['job_name'] except: job_name='ETLDevSandbox_GlueJob' try: schedule_exp=trigger_config['schedule_exp'] except: schedule_exp='cron(15 12 * * ? *)' print("Creating trigger with the below parameters: trigger_name: %s, trigger_type: %s, job_name: %s, schedule_exp: %s") response=None try: response=glue.create_trigger( Name=trigger_name, Type=trigger_type, Schedule=schedule_exp, Actions=[ { 'JobName':job_name, 'Arguments':{ '--extra-py-files':','.join(extra_py_files), '--extra-files':','.join(extra_files), 'State':'ACTIVATED' } } ] ) except: response=glue.delete_trigger(Name=trigger_name) response=glue.create_trigger( Name=trigger_name, Type=trigger_type, Schedule=schedule_exp, Actions=[ { 'JobName':job_name, 'Arguments':{ '--extra-py-files':','.join(extra_py_files), '--extra-files':','.join(extra_files), 'State':'ACTIVATED' } } ] ) print("Glue job trigger created successfully.") glue.start_trigger(Name=trigger_name) return response def get_trigger_config(codecommit,repositoryName): trigger_file=codecommit.get_file(repositoryName=repositoryName, filePath="/config/config.json") trigger_config=ast.literal_eval(trigger_file['fileContent'].decode("UTF-8")) print(trigger_config) return trigger_config def start_glue_job(glue,job_name='ETLDevSandbox_GlueJob', trigger_config=None, extra_py_files=None, extra_files=None): response=glue.start_job_run( JobName=job_name, Arguments={ '--extra-py-files':','.join(extra_py_files), '--extra-files':','.join(extra_files) } ) print("Starting glue job %s with parameters :: --extra-py-files : %s :: extra-files : %" % (job_name,extra_py_files,extra_files)) return response def get_repository_details(event, codecommit): repositoryName = event['Records'][0]['eventSourceARN'].split(':')[5] #repositoryURL = codecommit.get_repository(repositoryName=repositoryName)['repositoryMetadata']['cloneUrlHttp'] branch = codecommit.get_repository(repositoryName=repositoryName)['repositoryMetadata']['defaultBranch'] return repositoryName, branch # returns a list of a all files in the branch def get_blob_list(codecommit, repository, branch): # Reference: https://medium.com/@michael.niedermayr/using-aws-codecommit-and-lambda-for-automatic-code-deployment-to-s3-bucket-b35aa83d029b response = codecommit.get_differences( repositoryName=repository, afterCommitSpecifier=branch, ) blob_list = [difference['afterBlob'] for difference in response['differences']] while 'nextToken' in response: response = codecommit.get_differences( repositoryName=repository, afterCommitSpecifier=branch, nextToken=response['nextToken'] ) blob_list += [difference['afterBlob'] for difference in response['differences']] return blob_list