# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

# Functions to map predictions back to their original sentence in NERC due to the splitting induced by the limit
# input size in BERT

def map_split_preds_to_idx(split_preds, split_idx):
    """
    Maps back the sentences being split
    (due to bert input size constraints) and their predictions,
    to the original sentence index

    :param split_preds: list of lists of tag predictions for each word
    :param split_idx: list of deduplicated indexes when splitting sentences
    :return: list of grouped predictions for each initial sentence and respective initial sentence id
    """

    idx = []
    flat_preds = []
    flat = []
    
    for i, _id in enumerate(split_idx):
        # When the i changes, we append the flat list to flat_preds
        if i != 0 and _id != split_idx[i-1]:
            flat_preds.append(flat)
            flat = split_preds[i]
            idx.append(split_idx[i-1])
        else:
            flat += split_preds[i]
    # last one:
    flat_preds.append(flat)
    idx.append(split_idx[-1])
    
    return flat_preds, idx


def y2label(y_pred, int2tag, mask=0):
    """
    Transforms numbers to the corresponding label
    :param y_pred: list of lists of tag predictions as integers for each word in a sentence
    :param int2tag: (dict) dictionary of integers to their corresponding tag
    :param mask: (int) integer corresponding to '-PAD-' tag
    :return: list of lists of tag predictions as real tag (not integer)
    """
    out_pred = []
    for pred in y_pred:
        predicted_tags = []
        for token in pred:
            if token != mask:
                predicted_tags.append(int2tag[str(token)])
        out_pred.append(predicted_tags)
    return out_pred


def preds_to_dict_single(sent, y_pred_tags):
    """
    Postprocessing to format tag predictions into a dictionary of {tagged propertis: corresponding words}
    for each initial sentence.

    Function to transform the output into a dict of Tags and Values
    Same function as preds_to_dict_single with lower casing and replacing spaces by underscores in tags

    :param sent: (list) list of initial sentences as list of words
    :param y_pred_tags: (list) list of grouped predictions created by map_split_preds_to_idx
    :return: dictionary of {tagged propertis: corresponding words}
    for each initial sentence.
    """
    properties = {}
    for word, tag in zip(sent, y_pred_tags):
        tag_values = properties.get(tag, [])# Get tag if existing, otherwise set it to an empty list
        tag_values.append(word)
        properties[tag] = tag_values

    if "O" in properties.keys():
        del properties["O"]

    conc_properties = dict((k.lower().replace(' ', '_'), list(v)) for k, v in properties.items())
        
    return conc_properties

#Used for reporting test performance 
def y2label_for_report(y_true, y_pred, int2tag, mask=0):
    zipped = zip(y_true.flat, y_pred.flat)
    out_true = []
    out_pred = []
    for zip_i in zipped:
        a, b = tuple(zip_i)
        if a != mask:
            out_true.append(int2tag[a])
            out_pred.append(int2tag[b])
    return out_true, out_pred