######################################################################
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
# SPDX-License-Identifier: MIT-0                                     #
######################################################################

import platform
import torch
import importlib
from configparser import ConfigParser

machine=platform.uname().machine
device_type='cpu'
if machine == 'aarch64':
    device_type='arm'

try:
    import torch_neuron
    device_type='inf1'
except ImportError:
    print('[WARN] Torch Neuron not Found')
    pass
try:
    import torch_neuronx
    device_type='inf2'
except ImportError:
    print('[WARN] Torch Neuronx not Found')
    pass

import os

# 1. READ config.properties
print("\nParsing configuration ...")
path_prefix = os.getcwd()
with open(path_prefix + '/../config.properties') as f:
    config_lines = '[global]\n' + f.read()
    f.close()
config = ConfigParser()
config.read_string(config_lines)

model_name = config['global']['huggingface_model_name']
tokenizer_class_name = config['global']['huggingface_tokenizer_class']
model_class_name = config['global']['huggingface_model_class']
sequence_length=int(config['global']['sequence_length'])
processor=config['global']['processor']
pipeline_cores=config['global']['pipeline_cores']
batch_size=int(config['global']['batch_size'])
test=config['global']['test']

question = "What does the little engine say?"

context = """In the childrens story about the little engine a small locomotive is pulling a large load up a mountain.
    Since the load is heavy and the engine is small it is not sure whether it will be able to do the job. This is a story 
    about how an optimistic attitude empowers everyone to achieve more. In the story the little engine says: 'I think I can' as it is 
    pulling the heavy load all the way to the top of the mountain. On the way down it says: I thought I could."""


# 2. LOAD PRE-TRAINED MODEL
print(f'\nLoading pre-trained model: {model_name}')
transformers = importlib.import_module("transformers")
tokenizer_class = getattr(transformers, tokenizer_class_name)
model_class = getattr(transformers, model_class_name)
tokenizer = tokenizer_class.from_pretrained(model_name)
model = model_class.from_pretrained(model_name, return_dict=False)

# 3. TOKENIZE THE INPUT
print('\nTokenizing input sample ...')
inputs = tokenizer.encode_plus(question,
                               context,
                               return_tensors="pt",
                               max_length=sequence_length,
                               padding='max_length',
                               truncation=True)
if device_type not in ['inf1', 'inf2']:
    if torch.cuda.is_available():
        device = torch.device("cuda")
        device_type = "gpu"
        model.to(device)
        inputs.to(device)
    else:
        device = torch.device("cpu")

if device_type == processor:
    print(f"   ... Using device: {device_type}")
else:
    print(f"[WARN] detected device_type ({device_type}) does not match the configured processor ({processor})")

# 2. COMPILE THE MODEL
print('\nTracing model ...')
example_inputs = (
    torch.cat([inputs['input_ids']] * batch_size,0), 
    torch.cat([inputs['attention_mask']] * batch_size,0)
)
os.makedirs(f'traced-{model_name}', exist_ok=True)
torch.set_num_threads(6)
if 'inf' in processor:
    model_traced = torch.neuron.trace(model, 
                                  example_inputs, 
                                  verbose=1, 
                                  compiler_workdir=f'./traced-{model_name}/compile_wd_{processor}_bs{batch_size}_seq{sequence_length}_pc{pipeline_cores}',  
                                  compiler_args = ['--neuroncore-pipeline-cores', str(pipeline_cores)])
elif 'inf2' in processor:
    model_traced = torch_neuronx.trace(model,
                                  example_inputs)
else:
    model_traced = torch.jit.trace(model, example_inputs)
    
# 3. TEST THE COMPILED MODEL (Optional)        
if test.lower() == 'true':
    print("\nTesting traced model ...")
    print(f"Question: {question}")
    # Testing the traced model
    answer_logits = model_traced(*example_inputs)
    answer_start = answer_logits[0].argmax().item()
    answer_end = answer_logits[1].argmax().item()+1
    answer_txt = ""
    if answer_end > answer_start:
        answer_txt = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
    else:
        answer_txt = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:]))
    print(f'Model Answer: {answer_txt}')

# 4. SAVE THE COMPILED MODEL
print('\nSaving traced model ...')
model_path=f'./traced-{model_name}/{model_name}_bs{batch_size}_seq{sequence_length}_pc{pipeline_cores}_{processor}.pt'
model_traced.save(model_path)

print(f'Done. Model saved as: {model_path}')