import json
import boto3
import os
import pandas as pd
def lambda_handler(event, context):
    # Splits remarks file and readies to send them to Comprehend
    #Setting up necessary client and resource variables
    s3client = boto3.client('s3')
    s3 = boto3.resource('s3')

    input_bucket = os.environ['S3Bucket']
    DeploymentType = os.environ['Type']
    input_key = 'inputs/mv_war_main_prop_remark.txt'
    
    # Changes the current working directory to a temporary folder called '/tmp/'
    os.chdir('/tmp/')
    
    # Splits the subdomain from the file name and extension
    input_file_list = input_key.split('/')
    # Assigns the file name to an variable
    input_file = input_file_list[1]
    # Assigns a path for the to-be-created file as the temporary folder
    input_file_path = '/tmp/' + str(input_file)
    
    print("File: " + str(input_file))
    print("File Path: " + '/tmp/' + str(input_file))
    
    # Uses the client to download the object that triggered the event into the '/tmp/' directory previously made'
    s3client.download_file(input_bucket, input_key, input_file_path)
    
    df = pd.read_csv('/tmp/mv_war_main_prop_remark.txt',encoding='latin-1',header=0)
    
    
    # Lists the files in the /tmp directory
    print("OS File List:" + str(os.listdir('/tmp/')))
    
    # Sets a chunk size to about 90 MB
    Limit = 85000
    totalRows = len(df)
    numFiles = totalRows//Limit + 1
    # Sets an iterator
    file_number = 1
    rowTracker = 0
    errors = 0
    if DeploymentType == 'Test':
        numFiles = 1
        Limit = totalRows//100
    # Opens the file and prepares to iterate through it
    while file_number <= numFiles:
        toWrite = []
        for index,row in df.iterrows():
            if rowTracker < Limit:
                try:
                    writeStr = str(row['SN_WAR'])+',' + str(row['TEXT_REMARK'].encode('utf-8','ignore')).replace("\n",' ').replace('\r','')+"\n"
                    toWrite.append(writeStr)
                except:
                    errors +=1
                    if errors <=10:
                        print (str(row['TEXT_REMARK']))
                rowTracker +=1
            else:
                fileWriter = open(str(file_number)+'-'+input_file, 'w+')
                fileWriter.writelines(toWrite)
                fileWriter.close()
                s3.meta.client.upload_file('/tmp/'+str(file_number)+'-'+input_file, input_bucket, 'model-input/'+str(file_number)+'-'+input_file)
                os.remove('/tmp/'+str(file_number)+'-'+input_file)
                rowTracker = 0
                toWrite = []
                file_number +=1
                if DeploymentType == 'Test':
                    break
    # Write one last time for the last file
    if DeploymentType != 'Test':
        fileWriter = open(str(file_number)+'-'+input_file, 'w+')
        fileWriter.writelines(toWrite)
        fileWriter.close()
        s3.meta.client.upload_file('/tmp/'+str(file_number)+'-'+input_file, input_bucket, 'model-input/'+str(file_number)+'-'+input_file)
        os.remove('/tmp/'+str(file_number)+'-'+input_file)
    print ('Number of Errors: ', str(errors))
    print ('Total length of df: ', str(len(df)))
            # # Opens a new file with write, add the iterator to the name
            # with open(str(file_number)+ '-'+input_file,'w+') as chunk_file:
            #     chunk_file.write(chunk)
            #     # Where the new file currently is
            #     file = '/tmp/' + str(file_number)+'-'+input_file
            #     # Where the new file will be written to
            #     put_key = 'model-input/' + str(file_number)+'-'+ input_file
            #     # Pushes to s3
            #     s3.meta.client.upload_file(file, input_bucket, put_key)
            #     # Deletes the current content of chunk_file, is this necessary if we just overwrite the variable in the next loop?
            #     chunk_file.seek(0)
            #     chunk_file.truncate()
            # # Iterates up and moves on to the next 90 MB chunk
            # file_number += 1
            # chunk = f.read(CHUNK_SIZE)
    
    print("OS File List:" + str(os.listdir('.')))