description: Export EC2 /var/log/secure to CloudTrail Lake schemaVersion: '0.3' assumeRole: '{{AutomationAssumeRole}}' parameters: AutomationAssumeRole: type: String description: '(Required) The Amazon Resource Name (ARN) of the IAM role that allows Automation to perform the actions on your behalf. If no role is specified, Systems Manager Automation uses your IAM permissions to operate this runbook.' channelArn: type: String description: The name of the CloudTrail Lake ARN to use. temps3bucket: type: 'AWS::S3::Bucket::Name' description: Temporary S3 bucket before exporting to CloudTrail Lake. InstanceIds: type: StringList description: (Required) EC2 Instance(s) to start mainSteps: - name: copyLogsToS3 action: 'aws:runCommand' inputs: DocumentName: AWS-RunShellScript InstanceIds: '{{ InstanceIds }}' Parameters: commands: - 'echo -e "/var/log/secure\n{\ncopytruncate\n}" | tee /etc/logrotate.d/seclog > /dev/null' - 'instanceid=$(curl http://169.254.169.254/latest/meta-data/instance-id)' - 'aws s3 cp /var/log/secure s3://{{temps3bucket}}/$instanceid/secure >>/tmp/s3copy.out 2>&1' - /usr/sbin/logrotate -vf /etc/logrotate.d/seclog >> /tmp/logrotate.out 2>&1 description: copy Logs to S3 - name: exportLogsToCloudTrailLake action: 'aws:executeScript' timeoutSeconds: 120 onFailure: Abort inputs: Runtime: python3.8 Handler: publishEventsToCloudTrailLake InputPayload: channelArn: '{{channelArn}}' temps3bucket: '{{temps3bucket}}' InstanceIds: '{{ InstanceIds }}' Script: |- def download_dir(prefix, local, bucket, client): import os """ params: - prefix: pattern to match in s3 - local: local path to folder in which to place files - bucket: s3 bucket with target contents - client: initialized s3 client object """ keys = [] dirs = [] next_token = '' base_kwargs = { 'Bucket': bucket, 'Prefix': prefix, } while next_token is not None: kwargs = base_kwargs.copy() if next_token != '': kwargs.update({'ContinuationToken': next_token}) results = client.list_objects_v2(**kwargs) contents = results.get('Contents') for i in contents: k = i.get('Key') if k[-1] != '/': keys.append(k) else: dirs.append(k) next_token = results.get('NextContinuationToken') for d in dirs: dest_pathname = os.path.join(local, d) if not os.path.exists(os.path.dirname(dest_pathname)): os.makedirs(os.path.dirname(dest_pathname)) for k in keys: dest_pathname = os.path.join(local, k) if not os.path.exists(os.path.dirname(dest_pathname)): os.makedirs(os.path.dirname(dest_pathname)) client.download_file(bucket, k, dest_pathname) def publishEventsToCloudTrailLake(events, context): import boto3 import json import re from datetime import datetime, timezone import tempfile import uuid import os # print('boto3 version') # print(boto3.__version__) # Initialize client cloudtrail = boto3.client('cloudtrail-data') # Set up the S3 client s3 = boto3.client('s3') resource = boto3.resource('s3') # Set up the local file path for the Lambda container # file_path = '/tmp/varlogsecure' tmpdir = tempfile.mkdtemp() file_path = os.path.dirname(tmpdir) + '/varlogsecure' # print(file_path) # Set up the S3 bucket and object key for the file to copy bucket_name = events['temps3bucket'] channelArnArg = events['channelArn'] arnparts = channelArnArg.split(':') InstanceIds = events['InstanceIds'] aws_account_id = arnparts[4] auditEventsData = [] # Loop through all secure files for instances argument in the S3 bucket and download them to the local directory for InstanceId in InstanceIds: download_dir(InstanceId, file_path, bucket_name, s3) # iterate through downloaded files for root, dirs, files in os.walk(file_path): for name in files: log_path = os.path.join(root, name) instanceid = root.split('/')[3] # Open the log file for reading. f = open(log_path, 'r') # Read the first 100 lines lines = f.readlines(100) while lines: for line in lines: # Split the line into fields parts = line.split() # Read timestamp from /var/log/secure and convert to desired format eventTimeStr = parts[0] + ' ' + parts[1] + ' ' + str(datetime.today().year) + ' ' + parts[2] eventTimeDt = datetime.strptime(eventTimeStr, '%b %d %Y %H:%M:%S') eventTimeArg = eventTimeDt.strftime("%Y-%m-%dT%H:%M:%S") + 'Z' hostname = parts[3] eventName = parts[4].strip('[') message = parts[4] + parts[5].join(parts[6:]) procname = parts[4] eventData = { "version": "0.1", "userIdentity": { "type": procname, "principalId": hostname, "details": { "message": message } }, "eventSource": instanceid, "eventName": eventName, "eventTime": eventTimeArg, "recipientAccountId": aws_account_id, "additionalEventData": { "message": line } } eventDataStr = json.dumps(eventData) # print(eventData) auditEventsData.append( { "id": str(uuid.uuid4()), "eventData": eventDataStr } ) # Read the next 100 lines lines = f.readlines(100) # print(auditEventsData if len(auditEventsData) > 0: response = cloudtrail.put_audit_events(auditEvents=auditEventsData, channelArn=channelArnArg) # print(response) outputs: - Name: response Selector: $.Payload Type: StringMap