AWSTemplateFormatVersion: 2010-09-09
Transform: AWS::Serverless-2016-10-31
Description: A sample cloudformation template to deploy infrastructure for Dicom Metadata Parser

Parameters:
  S3InputBucketName:
    Type: String
    Description: Name of S3 Bucket where DICOM file are located
  S3OutputBucketName:
    Type: String
    Description: Name of S3 Bucket DICOM metadata is placed
  VpcBatch:
    Type: AWS::EC2::VPC::Id
    Description: VPC ID to launch compute resources into.
  SubnetsBatch:
    Type: List<AWS::EC2::Subnet::Id>
    Description: List of subnets to run AWS Batch Fargate tasks
  ContainerMemory:
    Type: String
    Description: MiB allocated to AWS Batch Container
    Default: 1024
  ContainervCPU:
    Type: String
    Description: Number of vCPUS reserved for the job.
    Default: 0.5
  LambdaMemory:
    Type: Number
    Description: MiB allocated to AWS Lambda Function
    Default: 256
  LambdaDuration:
    Type: Number
    Description: Max duration of Lambda Runtime
    Default: 600
  AssignPublicIp:
    Type: String
    AllowedValues:
      - DISABLED
      - ENABLED
    Default: ENABLED
    Description: Assigned Public IPs to containers, if placing in private subnet can be DISABLED.
  PartitionKey:
    Type: String
    AllowedValues:
      - study_date
    Default: study_date
    Description: Partition the S3 data by key
  GlueTableName:
    Type: String
    Description: Name of Dicom meta table
    Default: dicom_metadata
  LogLevel:
    Type: String
    Description: Set Log Level of Application
    AllowedValues:
      - INFO
      - DEBUG
    Default: INFO
  VersionDescription:
    Type: String
    Description: Unique description to pass to Dicom parser lambda version
    Default: 1
Resources:
  KMSKey:
    Type: 'AWS::KMS::Key'
    Properties:
      Description: KMS Key used by Dicom Parser
      EnableKeyRotation: true
      PendingWindowInDays: 20
      KeyPolicy:
        Version: 2012-10-17
        Id: Dicom Parser KMS Key
        Statement:
          - Sid: Enable IAM User Permissions
            Effect: Allow
            Principal:
              AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root'
            Action: 'kms:*'
            Resource: '*'
          - Sid: Allow EventBridge Service
            Effect: Allow
            Principal:
              Service: 
                - events.amazonaws.com
            Action:
             - kms:Decrypt
             - kms:GenerateDataKey
            Resource: '*'
  KeyAlias:
    DeletionPolicy: Retain
    UpdateReplacePolicy: Retain
    Type: 'AWS::KMS::Alias'
    Properties:
      AliasName: !Sub 'alias/${AWS::StackName}'
      TargetKeyId: !Ref KMSKey
  S3OutputBucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Ref S3OutputBucketName
      BucketEncryption:
        ServerSideEncryptionConfiguration:
          - ServerSideEncryptionByDefault:
              SSEAlgorithm: "AES256"
            BucketKeyEnabled: true
      PublicAccessBlockConfiguration:
        BlockPublicAcls: true
        BlockPublicPolicy: true
        IgnorePublicAcls: true
        RestrictPublicBuckets: true
  S3InputBucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Ref S3InputBucketName
      BucketEncryption:
        ServerSideEncryptionConfiguration:
          - ServerSideEncryptionByDefault:
              SSEAlgorithm: "AES256"
            BucketKeyEnabled: true
      PublicAccessBlockConfiguration:
        BlockPublicAcls: true
        BlockPublicPolicy: true
        IgnorePublicAcls: true
        RestrictPublicBuckets: true
  DeadLetterQueue:
    Type: AWS::SQS::Queue
    Properties:
      QueueName: dicom-dead-letter-queue
      # if adding KMSKey, ensure all IAM roles have permission to kms:Decrypt
      KmsMasterKeyId: !Ref KeyAlias
   
  DeadLetterQueuePolicy: 
    Type: AWS::SQS::QueuePolicy
    Properties: 
      Queues: 
        - !Ref DeadLetterQueue
      PolicyDocument: 
        Statement: 
          - 
            Action: 
              - "sqs:SendMessage"
            Effect: "Allow"
            Resource: !GetAtt DeadLetterQueue.Arn
            Principal:  
              Service: 
                - "events.amazonaws.com"     
            Condition:
              ArnEquals : 
                "aws:SourceArn": !GetAtt  DicomEventRule.Arn
      
  BatchCompute:
    Type: AWS::Batch::ComputeEnvironment
    Properties:
      Type: Managed
      ComputeEnvironmentName: dicom-compute
      ComputeResources:
        MaxvCpus: 2
        Subnets: !Ref SubnetsBatch
        Type: FARGATE_SPOT
        SecurityGroupIds:
          - !GetAtt SecurityGroup.GroupId
  BatchQueue:
    Type: AWS::Batch::JobQueue
    Properties:
      JobQueueName: dicom-queue
      ComputeEnvironmentOrder:
        - ComputeEnvironment: !Ref BatchCompute
          Order: 1
      Priority: 1
  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: Dicom AWS Batch Fargate SecurityGroup
      SecurityGroupEgress:
        - IpProtocol: TCP
          FromPort: 443
          ToPort: 443
          CidrIp: 0.0.0.0/0
      VpcId: !Ref VpcBatch
  DicomParser:
    Type: AWS::Serverless::Function
    Properties:
      Role: !GetAtt LambdaRole.Arn
      FunctionName: dicom-lambda-image
      AutoPublishAlias: live
      VersionDescription: !Ref VersionDescription
      DeploymentPreference:
        Enabled: True
        Type: AllAtOnce
      DeadLetterQueue:
        TargetArn: !GetAtt DeadLetterQueue.Arn
        Type: SQS
      CodeUri: ../src/
      Timeout: !Ref LambdaDuration
      MemorySize: !Ref LambdaMemory
      PackageType: Image
      Events:
        S3Event:
          Type: S3
          Properties:
            Bucket: !Ref S3InputBucket
            Events: s3:ObjectCreated:*
      Environment:
        Variables:
          S3_OUTPUT_BUCKET: !Ref S3OutputBucket
          S3_OUTPUT_BUCKET_REGION: !Ref AWS::Region
          LOGLEVEL: !Ref LogLevel
          GLUE_DATABASE_NAME: !Ref GlueDatabase
          GLUE_DATABASE_TABLE: !Ref GlueTableName
          AWS_BATCH_QUEUE: !Ref BatchQueue
    Metadata:
      Dockerfile: Dockerfile.lambda
      DockerContext: ../
      DockerTag: latest
  CustomGetImageResource:
    Type: AWS::CloudFormation::CustomResource
    DependsOn: DicomParser
    Properties:
      ServiceToken: !GetAtt "CustomGetImageFunction.Arn"
      LambdaName: dicom-lambda-image
      LambdaAlias: !Ref DicomParserAliaslive
      VersionDescription: !Ref VersionDescription
      CodeDeployGroup: !Ref DicomParserDeploymentGroup

  CustomGetImageFunction:
    Type: AWS::Serverless::Function
    Properties:
      Handler: index.lambda_handler
      Description: "Retrieves Lambda Image Uri"
      PackageType: Zip
      Policies:
        - AWSLambdaExecute
        - Version: "2012-10-17"
          Statement:
            - Effect: Allow
              Action:
                - lambda:GetFunction
              Resource: !Join [':', [!GetAtt DicomParser.Arn, '*']]
                
      Timeout: 30
      # python 3.7 includes cfn-response module
      Runtime: python3.7
      InlineCode: |
        import logging
        import cfnresponse
        import boto3

        logger = logging.getLogger()
        logger.setLevel(logging.INFO)

        def lambda_handler(event, context):
          logger.info(f'Event: {event}')  
          try: 
            responseData = {}
            if event['RequestType'] == 'Delete':
              logger.info('Delete operation - Skip all actions') 
              cfnresponse.send(event, context, cfnresponse.SUCCESS, {})
            if event['RequestType'] in ["Create", "Update"]:                      
              LambdaName=event['ResourceProperties']['LambdaName']
              client = boto3.client('lambda')
              response = client.get_function(FunctionName=LambdaName,Qualifier='live')
              responseData = {}
              responseData['ImageUri']= response.get('Code').get('ImageUri')
              logger.info('Retrieved ImageUri!')
              cfnresponse.send(event, context, cfnresponse.SUCCESS, responseData)
            else:
              logger.info(f"Not Valid Request Type {event['RequestType']}") 
              cfnresponse.send(event, context, cfnresponse.SUCCESS, responseData)
          except Exception as err:
            logger.error(err)
            responseData = {"Msg": str(err)}
            cfnresponse.send(event,context,cfnresponse.FAILED,responseData)
          return
  BatchJobDefinition:
    Type: AWS::Batch::JobDefinition
    Properties:
      JobDefinitionName: dicom-parser
      Type: container
      PropagateTags: true
      PlatformCapabilities:
        - FARGATE
      ContainerProperties:
        Image: !GetAtt CustomGetImageResource.ImageUri
        User: dicom
        JobRoleArn: !GetAtt ExecutionRole.Arn
        ResourceRequirements:
          - Type: VCPU
            Value: !Ref ContainervCPU
          - Type: MEMORY
            Value: !Ref ContainerMemory
        NetworkConfiguration:
          AssignPublicIp: !Ref AssignPublicIp
        ExecutionRoleArn: !GetAtt ExecutionRole.Arn
        Environment:
          - Name: S3_OUTPUT_BUCKET
            Value: !Ref S3OutputBucket
          - Name: S3_OUTPUT_BUCKET_REGION
            Value: !Ref AWS::Region
          - Name: LOGLEVEL
            Value: !Ref LogLevel
          - Name: GLUE_DATABASE_NAME
            Value: !Ref GlueDatabase
          - Name: GLUE_DATABASE_TABLE
            Value: !Ref GlueTableName
        Command:
          - app.py
        LogConfiguration:
          LogDriver: awslogs
          Options:
            "awslogs-group": !Ref LogGroup
            "awslogs-stream-prefix": "job"

  ExecutionRole:
    Type: AWS::IAM::Role
    Properties:
      RoleName: dicom-batch-ExecutionRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Principal:
              Service:
                - ecs-tasks.amazonaws.com
            Action:
              - sts:AssumeRole
  PermissionForS3ToInvokeLambda:
    Type: AWS::Lambda::Permission
    Properties:
      FunctionName: !Ref "DicomParser"
      Action: "lambda:InvokeFunction"
      Principal: "s3.amazonaws.com"
      SourceAccount: !Ref AWS::AccountId
      SourceArn: !GetAtt S3InputBucket.Arn
  LambdaRole:
    Type: AWS::IAM::Role
    Properties:
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
        - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Principal:
              Service:
                - lambda.amazonaws.com
            Action:
              - sts:AssumeRole
      Policies:
        - PolicyName: SendToSQS
          PolicyDocument:
            Version: 2012-10-17
            Statement:
              - Effect: Allow
                Action:
                  - sqs:SendMessage
                Resource:
                  - !GetAtt DeadLetterQueue.Arn
  ExecutionRolePolicy:
    Type: AWS::IAM::Policy
    Properties:
      Roles:
        - !Ref ExecutionRole
        - !Ref LambdaRole
      PolicyName: dicom-ExecutionRoleS3Policy
      PolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Action:
              - s3:GetObject
            Resource:
              - !Sub "arn:aws:s3:::${S3InputBucketName}/*"
          - Effect: Allow
            Action:
              - s3:PutObject
              - s3:DeleteObject
              - s3:PutObjectTagging
            Resource:
              - !Sub "${S3OutputBucket.Arn}/*"
          - Effect: Allow
            Action:
              - "logs:CreateLogStream"
              - "logs:DescribeLogStreams"
              - "logs:PutLogEvents"
              - "logs:GetLogEvents"
            Resource:
              - !GetAtt LogGroup.Arn
          - Effect: Allow
            Action:
              - glue:GetTable
              - glue:UpdateTable
              - glue:BatchCreatePartition
            Resource:
              - !Sub "arn:aws:glue:${AWS::Region}:${AWS::AccountId}:catalog"
              - !Sub "arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/${GlueDatabase}"
              - !Sub "arn:aws:glue:${AWS::Region}:${AWS::AccountId}:table/${GlueDatabase}/${GlueTable}"
  KMSKeyPolicy:
    Type: AWS::IAM::Policy
    Properties:
      Roles:
        - !Ref ExecutionRole
        - !Ref LambdaRole
      PolicyName: dicom-KMSKey-Policy
      PolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Action:
              - kms:Decrypt
              - kms:GenerateDataKey
            Resource:
              - !GetAtt KMSKey.Arn
  LogGroup:
    Type: AWS::Logs::LogGroup
    Properties:
      LogGroupName: dicom-parser
      RetentionInDays: 7
  LambdaAWSBatchRolePolicy:
    Type: AWS::IAM::Policy
    Properties:
      PolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Action:
              - batch:SubmitJob
            Resource:
              - !Ref BatchQueue
              - !Sub arn:aws:batch:${AWS::Region}:${AWS::AccountId}:job-definition/dicom-parser
      Roles:
        - !Ref LambdaRole
      PolicyName: dicom-lambda-execution-aws-batch-policy
 
      PolicyName: dicom-event-rule-dead-letter-policy

  DicomEventRule:
    Type: AWS::Events::Rule
    Properties:
      Name: dicom-batch-event-rule
      State: ENABLED
      EventBusName: default
      # RoleArn: !GetAtt EventRuleRole.Arn
      EventPattern:
        source:
          - aws.batch
        detail-type:
          - Batch Job State Change
        detail:
          status:
            - FAILED
          jobQueue:
            - !Ref BatchQueue
      Targets:
        - Arn: !GetAtt DeadLetterQueue.Arn
          Id: DeadLetterQueue

  GlueDatabase:
    Type: AWS::Glue::Database
    Properties:
      CatalogId: !Ref AWS::AccountId
      DatabaseInput:
        Name: dicom_db
        Description: Dicom Metadata Source

  GlueCrawler:
    Type: AWS::Glue::Crawler
    Properties:
      Name: dicom-crawler
      Role: !GetAtt GlueCrawlerRole.Arn
      SchemaChangePolicy:
        UpdateBehavior: UPDATE_IN_DATABASE
        DeleteBehavior: LOG
      Configuration: '{"Version":1.0,"CrawlerOutput":{"Partitions":{"AddOrUpdateBehavior":"InheritFromTable"}},"Grouping":{"TableGroupingPolicy":"CombineCompatibleSchemas"}}'
      Targets:
        CatalogTargets:
          - DatabaseName: !Ref GlueDatabase
            Tables:
              - !Ref GlueTableName
  GlueCrawlerRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: 2012-10-17
        Statement:
          - Effect: Allow
            Principal:
              Service:
                - glue.amazonaws.com
            Action:
              - sts:AssumeRole
      Path: /
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
      Policies:
        - PolicyName: "DicomGlueCrawler"
          PolicyDocument:
            Version: "2012-10-17"
            Statement:
              - Effect: "Allow"
                Action:
                  - s3:GetObject
                  - s3:PutObject
                  - s3:PutObjectTagging

                Resource:
                  - !Sub ${S3OutputBucket.Arn}/*

  GlueTable:
    Type: AWS::Glue::Table
    Properties:
      CatalogId: !Ref AWS::AccountId
      DatabaseName: !Ref GlueDatabase
      TableInput:
        Name: !Ref GlueTableName
        Parameters:
          {
            "projection.enabled": "false",
            "compressionType": "snappy",
            "typeOfData": "file",
            "classification": "parquet",
          }
        PartitionKeys:
        - Name: !Ref PartitionKey
          Type: "date"
        TableType: EXTERNAL_TABLE
        StorageDescriptor:
          Location: !Sub "s3://${S3OutputBucket}/"
          InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
          OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
          Compressed: true
          SerdeInfo:
            {
              SerializationLibrary: "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
              Parameters: { "serialization.format": "1" },
            }
          Columns:
            - Name: modality
              Type: string
            - Name: sopclass_uid
              Type: string
            - Name: study_time
              Type: string
            - Name: study_id
              Type: string
            - Name: series_number
              Type: string
            - Name: patient_sex
              Type: string
            - Name: patient_birth_date
              Type: date
            - Name: patient_age
              Type: string
            - Name: patient_orientation
              Type: array<string>
            - Name: patient_weight
              Type: string
            - Name: image_comments
              Type: string  
            - Name: source_s3_bucket
              Type: string
            - Name: source_s3_region
              Type: string
            - Name: source_s3_key
              Type: string
            - Name: source_s3_archive_path
              Type: string