################## # This pattern creates a S3 file trigger that triggers a lambda that in turn triggers a Glue Workflow to process the file. ################## data "aws_caller_identity" "current" {} data "aws_region" "current" {} # Create a new S3 bucket to store Glue jobs, Glue Job Logs, etc. resource "aws_s3_bucket" "landing_bucket" { bucket = "s3-landing-${data.aws_caller_identity.current.account_id}" } # Archive source code for Lambda data "archive_file" "lambda_zip_file" { type = "zip" source_file = "${path.module}/src/app.py" output_path = "${path.module}/lambda.zip" } ################## # Adding S3 bucket as trigger to my lambda and giving the permissions ################## resource "aws_s3_bucket_notification" "aws-lambda-trigger" { bucket = aws_s3_bucket.landing_bucket.id lambda_function { lambda_function_arn = aws_lambda_function.first_glue_job_trigger_glue_workflow.arn events = ["s3:ObjectCreated:*"] # filter_prefix = "file_landing/" # filter_suffix = "file-extension" } } # Provide permission to invoke lambda function when file is uploaded to S3 bucket resource "aws_lambda_permission" "file_lambda_permission" { statement_id = "AllowS3Invoke" action = "lambda:InvokeFunction" function_name = "${aws_lambda_function.first_glue_job_trigger_glue_workflow.function_name}" principal = "s3.amazonaws.com" source_arn = "arn:aws:s3:::${aws_s3_bucket.landing_bucket.id}" } # Randomly generate a name for the lambda function resource "random_string" "randomstring_fileingestion" { length = 4 special = false } # Archive/Zip the source code for Lambda data "archive_file" "lambda_zip_dir" { type = "zip" output_path = "/tmp/file_trigger_lambda-${random_string.randomstring_fileingestion.result}.zip" source_dir = "src" } # Upload the Source code into S3 Bucket resource "aws_s3_object" "lambda_code_upload" { key = "lambda_scripts/first_glue_job_trigger_glue_workflow.zip" bucket = aws_s3_bucket.landing_bucket.bucket source = data.archive_file.lambda_zip_dir.output_path etag = data.archive_file.lambda_zip_dir.output_base64sha256 } # To Deploy Lambda Layers - Has been commented out for this version # resource "aws_lambda_layer_version" "lambda_layer_wrangler" { # filename = "${path.module}/../../../../build_tools/awswrangler-layer-2.17.0-py3.9.zip" # layer_name = "aws_wrangler" # compatible_runtimes = ["python3.9"] # } # Create Lambda Function resource "aws_lambda_function" "first_glue_job_trigger_glue_workflow" { function_name = "first_glue_job_trigger_glue_workflow" s3_bucket = aws_s3_bucket.landing_bucket.bucket s3_key = "lambda_scripts/first_glue_job_trigger_glue_workflow.zip" # S3 Key for the Lambda Function source_code_hash = "${data.archive_file.lambda_zip_dir.output_base64sha256}" # Archived source code hash runtime = "python3.9" # Choose from python3.6, python3.7, python3.8, python3.9 handler = "first_glue_job_trigger_glue_workflow.lambda_handler" # This is the function that will be executed when the lambda is triggered timeout = 900 # 15 minutes is the maximum time allowed for the lambda function to run role = aws_iam_role.terraform_lambda_function_role.arn memory_size = 512 # vpc_config { # subnet_ids = var.subnet_id # security_group_ids = var.lambda_sg # } # layers = [aws_lambda_layer_version.lambda_layer_wrangler.arn] ephemeral_storage { size = 2048 # Min 512 MB and the Max 10240 MB } } # CloudWatch Log group to store Lambda logs resource "aws_cloudwatch_log_group" "lambda_demo_loggroup" { name = "/aws/lambda/${aws_lambda_function.first_glue_job_trigger_glue_workflow.function_name}" retention_in_days = 365 } ################## # Creating IAM Role for Lambda ################## # IAM Policy Document for Lambda - Allows Trust Policy for Lambda data "aws_iam_policy_document" "AWSLambdaTrustPolicy" { statement { actions = ["sts:AssumeRole"] effect = "Allow" principals { type = "Service" identifiers = ["lambda.amazonaws.com"] } } } # IAM Policy Document for Lambda - Allows Writing to Logs and Read/Write from File that Triggers the Lambda data "aws_iam_policy_document" "AWSLambdaPolicy" { statement { effect = "Allow" actions = [ "s3:GetBucket*", "s3:GetObject*", "s3:List*", "s3:Abort*", "s3:DeleteObject*", "s3:GetBucket*", "s3:GetObject*", "s3:List*", "s3:PutObject", "s3:PutObjectLegalHold", "s3:PutObjectRetention", "s3:PutObjectTagging", "s3:PutObjectVersionTagging" ] resources = [ aws_s3_bucket.landing_bucket.arn, "${aws_s3_bucket.landing_bucket.arn}/*" ] } statement { effect = "Allow" actions = [ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents" ] resources = [ aws_cloudwatch_log_group.lambda_demo_loggroup.arn ] } } # Create IAM Role for Lambda resource "aws_iam_role" "terraform_lambda_function_role" { name = "terraform_lambda_function_role" assume_role_policy = "${data.aws_iam_policy_document.AWSLambdaTrustPolicy.json}" # managed_policy_arns = [aws_iam_policy.lambda_policy.arn] } # Create IAM Policy for Lambda resource "aws_iam_policy" "lambda_policy" { name = "lambda_policy" description = "Policy to access S3" policy = "${data.aws_iam_policy_document.AWSLambdaPolicy.json}" # Attaching the Trust policy to the role } # Attach Policy to Lambda IAM Role resource "aws_iam_role_policy_attachment" "terraform_lambda_policy" { role = "${aws_iam_role.terraform_lambda_function_role.name}" policy_arn = aws_iam_policy.lambda_policy.arn # Attaching the policy with access to Logs and S3 to the role } ################ # Glue Workflow is created and the triggers are added to invoke the Glue jobs and Glue Crawlers ################ # Create Glue Workflow resource "aws_glue_workflow" "sample_glue_workflow" { name = "sample_glue_workflow" } # Create a Glue trigger to invoke the first Glue job and add the trigger to the Glue Workflow resource "aws_glue_trigger" "sample_glue_workflow-start" { name = "sample_glue_workflow-start" type = "ON_DEMAND" workflow_name = aws_glue_workflow.sample_glue_workflow.name actions { job_name = "first_glue_job" arguments = {"--run_type"="NORMAL","--process_name"="sample_glue_workflow","--config_path"="first_glue_job_conf_DEV.json","--database_name"="test_db"} } } # Create a Glue trigger to invoke the first Glue Crawler and add the trigger to the Glue Workflow resource "aws_glue_trigger" "sample_glue_workflow-first_crawler" { name = "sample_glue_workflow-start_first_crawler" type = "CONDITIONAL" workflow_name = aws_glue_workflow.sample_glue_workflow.name # Workflow name created above predicate { conditions { job_name = "first_glue_job" state = "SUCCEEDED" logical_operator = "EQUALS" } } actions { crawler_name = "first_crawler" } } # Create a Glue trigger to invoke the second Glue Job and add the trigger to the Glue Workflow resource "aws_glue_trigger" "sample_glue_workflow-second_glue_job" { name = "sample_glue_workflow-start_second_glue_job" type = "CONDITIONAL" workflow_name = aws_glue_workflow.sample_glue_workflow.name # Workflow name created above predicate { conditions { crawler_name = "first_crawler" crawl_state = "SUCCEEDED" logical_operator = "EQUALS" } } actions { job_name = "second_glue_job" arguments = {"--run_type"="NORMAL","--process_name"="sample_glue_workflow","--config_path"="second_glue_job_conf_DEV.json","--database_name"="test_db"} } } # Create a Glue trigger to invoke the second Glue Crawler and add the trigger to the Glue Workflow resource "aws_glue_trigger" "sample_glue_workflow-second_crawler" { name = "sample_glue_workflow-start_second_crawler" type = "CONDITIONAL" workflow_name = aws_glue_workflow.sample_glue_workflow.name predicate { conditions { job_name = "second_glue_job" state = "SUCCEEDED" logical_operator = "EQUALS" } } actions { crawler_name = "second_crawler" } } # IAM policies to allow access to the S3 bucket data "aws_iam_policy_document" "policy_document" { statement { effect = "Allow" actions = [ "s3:ListBucket" ] resources = [ aws_s3_bucket.landing_bucket.arn ] } statement { effect = "Allow" actions = [ "s3:GetObject", "s3:PutObject" ] resources = [ "${aws_s3_bucket.landing_bucket.arn}/*" ] } } resource "aws_iam_policy" "s3_access_iam_policy" { name = "sample-glue-s3-access-policy" policy = data.aws_iam_policy_document.policy_document.json } # Glue IAM roles and Policies resource "aws_iam_role" "sample_glue_role" { name = "sample-glue-role" assume_role_policy = <