'''
    Batch step 1: Using the IDs from the initial lambda, 'get' the data aligned to them
    In this case it generates some random data
    But in reality you would get that from somewhere else i.e. a DB
'''
import os
import random
import json


def get_id_data(start_timestamp):
    '''
    Just generating data here, but otherwise you would get it from elsewhere in your cloud setup
    '''
    line_data = []
    count = 0
    while count < 500:
        count += 1
        line_data.append({
            'timestamp': start_timestamp + (count * 60000 * 60),
            'value': random.randrange(10, 20000)
        })
    return line_data


def run():
    '''
    Same pattern as before. Get the Ids from EFS that have been provisioned
    '''
    guid = os.environ.get('STEP_FN_ID') or 'unknown'
    index = os.environ.get('AWS_BATCH_JOB_ARRAY_INDEX') or '0'

    working_dir = f"./batch/{guid}/{index}/"

    # use this pattern to test locally
    # working_dir = "test-data/1"

    prep_dir = f"{working_dir}/prep"
    data_dir = f"{working_dir}/data"

    ids = open(f'{prep_dir}/data.csv', 'r', encoding='utf-8')

    lines = ids.readlines()
    start_timestamp = 1663639968158

    os.mkdir(data_dir)

    for line in lines:
        sanitised_line = line.replace('\n', '')
        line_data = get_id_data(start_timestamp)
        with open(f'{data_dir}/{sanitised_line}.json', 'w', encoding='utf-8') as file_handler:
            file_handler.write(json.dumps(line_data))


run()