######################################################################
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
# SPDX-License-Identifier: MIT-0                                     #
######################################################################

import os
import torch
import subprocess
import torch.neuron
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import numpy as np
import time
import random
from essential_generators import DocumentGenerator
from common_settings import default_max_length, default_batch_size, default_model_name
import neuronperf
import neuronperf.torch
import json
import boto3

max_length = int(os.getenv('MAX_LENGTH', default_max_length))
batch_size = int(os.getenv('BATCH_SIZE', default_batch_size))
model_name = (os.getenv('MODEL_NAME', default_model_name))
os.environ['TOKENIZERS_PARALLELISM'] = 'False'
num_request_samples = 10 # Total number of samples to generate

# Neuron file name
neuron_model_file = '%s_inf_%d_%d.pt'%(model_name, max_length, batch_size)

# Get tokenizer and create encoded inputs
tokenizer = AutoTokenizer.from_pretrained(model_name)
gen = DocumentGenerator()
sequence_list, encoded_input_list = [], []
for _ in np.arange(num_request_samples):
    sequence = gen.sentence()
    encoded_inputs = tokenizer.encode_plus(sequence, max_length=max_length, 
            padding='max_length', truncation=True, return_tensors='pt')
    sequence_list.append(sequence)
    encoded_input_list.append(encoded_inputs)

# Prepare example_inputs Tensor
input_id_list, attention_mask_list = [], []
for _ in range(batch_size):
    tmp_i = random.choice(encoded_input_list)
    input_id_list.append(tmp_i['input_ids'])
    attention_mask_list.append(tmp_i['attention_mask'])
batch_input_ids_tensor = torch.cat(input_id_list)
batch_attention_mask_tensor = torch.cat(attention_mask_list)
example_inputs = batch_input_ids_tensor, batch_attention_mask_tensor

# Perform Neuronperf benchmarking, save results to S3
results = neuronperf.torch.benchmark(neuron_model_file, example_inputs, [batch_size], n_models=[4]) # FIXME: n_models should not be hard-coded
print(json.dumps(results, indent=2))

bucket = os.environ.get('S3_BUCKET_NAME', None)
if bucket:
    fname = f"neuronperf_results_{model_name}_inf_{max_length}_{batch_size}.json"
    s3 = boto3.client("s3")
    s3.put_object(Bucket=bucket, Key=fname, Body=json.dumps(results, indent=2).encode())