import json
import logging
import os
import sys

import torch
import torch.utils.data
import torch.utils.data.distributed
from transformers import BertTokenizer

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))

MAX_LEN = 64  # this is the max length of the sentence

print("Loading BERT tokenizer...")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)


def model_fn(model_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("================== objects in model_dir =====================")
    print(os.listdir(model_dir))
    loaded_model = torch.jit.load(os.path.join(model_dir, "traced_bert.pt"))
    print("================== model loaded =============================")
    
    return loaded_model.to(device)


def input_fn(request_body, request_content_type):
    """An input_fn that loads a pickled tensor"""
    if request_content_type == "application/json":
        data = json.loads(request_body)
        print("================ input sentences ===============")
        print(data)
        
        if isinstance(data, str):
            data = [data]
        elif isinstance(data, list) and len(data) > 0 and isinstance(data[0], str):
            pass
        else:
            raise ValueError("Unsupported input type. Input type can be a string or an non-empty list. \
                             I got {}".format(data))
                       

        #encoded = [tokenizer.encode(x, add_special_tokens=True) for x in data]
        #encoded = tokenizer(data, add_special_tokens=True) 
        
        # for backward compatibility use the following way to encode 
        # https://github.com/huggingface/transformers/issues/5580
        input_ids = [tokenizer.encode(x, add_special_tokens=True) for x in data]
        
        print("================ encoded sentences ==============")
        print(input_ids)

        # pad shorter sentence
        padded =  torch.zeros(len(input_ids), MAX_LEN) 
        for i, p in enumerate(input_ids):
            padded[i, :len(p)] = torch.tensor(p)
     
        # create mask
        mask = (padded != 0)
        
        print("================= padded input and attention mask ================")
        print(padded, '\n', mask)

        return padded.long(), mask.long()

    raise ValueError("Unsupported content type: {}".format(request_content_type))
    
    
def predict_fn(input_data, model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    input_id, input_mask = input_data
    input_id = input_id.to(device)
    input_mask = input_mask.to(device)
    
    with torch.no_grad():
        try:
            with torch.jit.optimized_execution(True, {"target_device": "eia:0"}):
                print("==================== using elastic inference ====================")
                y = model(input_id, attention_mask=input_mask)[0]
        except TypeError:
            y = model(input_id, attention_mask=input_mask)[0]
        
    print("==================== inference result =======================")
    print(y)
    return y