import sys import boto3 import pickle import json import pandas as pd from io import StringIO import os from malware_detection_utils.utils import logMessage LOGTYPE_ERROR = 'ERROR' LOGTYPE_INFO = 'INFO' LOGTYPE_DEBUG = 'DEBUG' def main(): aws_region=os.environ.get('AWS_REGION') input_bucket=os.environ.get('INPUT_BUCKET') input_bucket_prefix=os.environ.get('INPUT_BUCKET_PREFIX') output_bucket=os.environ.get('OUTPUT_BUCKET') logMessage(f"AWS_REGION {aws_region}", LOGTYPE_INFO) logMessage(f"INPUT_BUCKET {input_bucket}", LOGTYPE_INFO) logMessage(f"INPUT_BUCKET_PREFIX {input_bucket_prefix}", LOGTYPE_INFO) logMessage(f"OUTPUT_BUCKET {output_bucket}", LOGTYPE_INFO) client = boto3.client('s3', region_name=aws_region) response = client.list_objects_v2(Bucket=input_bucket, Prefix=input_bucket_prefix, Delimiter='/') s3 = boto3.resource('s3') csv_buffer = StringIO() json_data=json.dumps(response['Contents'], indent=4, sort_keys=True, default=str) df=pd.read_json(json_data) df.to_csv(csv_buffer) try: cnt=0 while ('NextContinuationToken' in response): csv_buffer = StringIO() json_data=json.dumps(response['Contents'], indent=4, sort_keys=True, default=str) df2=pd.read_json(json_data) df2.to_csv(csv_buffer) s3.Object(output_bucket, f'sorel-20m-object-list-{cnt}.csv').put(Body=csv_buffer.getvalue()) response = client.list_objects_v2(Bucket=input_bucket, Prefix=input_bucket_prefix, Delimiter='/', ContinuationToken=response['NextContinuationToken']) cnt=cnt+1 except Exception as ex: logMessage("Error occurred" + str(ex), LOGTYPE_INFO) print("Python file invoked") if __name__ == '__main__': main()