#!/bin/bash -e

# Get sorted list of all input file names
SORTED_FILELIST=($(find $INPUT_DIR -type f | sort))

# Calculate number of files for this worker to process:
#   ceiling(length(SORTED_FILELIST) / NUMBER_OF_WORKERS)
BATCH_SIZE=$(((${#SORTED_FILELIST[@]} + NUMBER_OF_WORKERS - 1) / NUMBER_OF_WORKERS))

# Get list of files for this worker to process
FILES_TO_PROCESS=(${SORTED_FILELIST[@]:$((AWS_BATCH_JOB_ARRAY_INDEX * BATCH_SIZE)):$BATCH_SIZE})

# Worker output directory
WORKER_OUTPUT_DIR="${OUTPUT_DIR}/${AWS_BATCH_JOB_ID}"
mkdir -p $WORKER_OUTPUT_DIR

echo "worker $(( AWS_BATCH_JOB_ARRAY_INDEX + 1 )) of ${NUMBER_OF_WORKERS}, processing ${#FILES_TO_PROCESS[@]} files"

for input_file in ${FILES_TO_PROCESS[@]}
do
  output_file="${WORKER_OUTPUT_DIR}/$(basename $input_file)"

  if [[ -f $output_file ]]
  then
    echo "output file $output_file already exists, skipping..."
    continue
  fi

  echo "processing $input_file"

  python predict.py $input_file $output_file
done