from io import StringIO import sqlite3 import boto3 import pandas as pd import os from malware_detection_utils.utils import logMessage LOGTYPE_ERROR = 'ERROR' LOGTYPE_INFO = 'INFO' LOGTYPE_DEBUG = 'DEBUG' def main(): aws_region=os.environ.get('AWS_REGION') bucket_name=os.environ.get('BUCKET_NAME', 'sorel-20m') object_name=os.environ.get('OBJECT_NAME','09-DEC-2020/processed-data/meta.db') file_name=os.environ.get('FILE_NAME','meta.db') metadatadb_bucket=os.environ.get('METADATADB_BUCKET') logMessage(f"AWS_REGION {aws_region}", LOGTYPE_INFO) logMessage(f"BUCKET_NAME {bucket_name}", LOGTYPE_INFO) logMessage(f"OBJECT_NAME {object_name}", LOGTYPE_INFO) logMessage(f"FILE_NAME {file_name}", LOGTYPE_INFO) logMessage(f"METADATADB_BUCKET {metadatadb_bucket}", LOGTYPE_INFO) try: # creating file path s3 = boto3.client('s3') s3.download_file(bucket_name, object_name, file_name) dbfile = 'meta.db' # Create a SQL connection to our SQLite database con = sqlite3.connect(dbfile) # creating cursor cur = con.cursor() # reading all table names table_list = cur.execute("SELECT * FROM meta") # Calling DataFrame constructor on list df = pd.DataFrame(table_list) # Be sure to close the connection con.close() s3 = boto3.resource('s3') csv_buffer = StringIO() df.to_csv(csv_buffer) s3.Object(metadatadb_bucket, f'malware.{file_name}').put(Body=csv_buffer.getvalue()) except Exception as ex: logMessage("Error occurred" + str(ex), LOGTYPE_INFO) print("Python file invoked") if __name__ == '__main__': main()