import boto3
import os
import numpy as np
import pandas as pd
import argparse
from gensim.models import Word2Vec
from gensim.models.callbacks import CallbackAny2Vec


from config import RANDOM_STATE, MAX_LENGTH, EPOCHS, BATCH_SIZE, EMBEDDING_DIM,  filters, kernel_size
# Import tensorflow lib
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Dropout, Embedding, Flatten, Conv1D, GlobalMaxPool1D, concatenate, MaxPooling1D, Lambda
from tensorflow.keras.models import Sequential
from tensorflow import keras
from tensorflow.keras import backend as K

SM_CHANNEL_EMBEDDING = '/opt/ml/input/data/embeddings'
SM_CHANNEL_TRAIN_STATIC = '/opt/ml/input/data/train-static'
SM_CHANNEL_VALID_STATIC = '/opt/ml/input/data/valid-static'
filters = 3
kernel_size = 3

def f1(y_true, y_pred):
    """Calculate f1 score based on precision/recall metrics defined within the function
    
    Args:
        y_true (numpy.array): the true labels of the data
        y_pred (numpy.array): the predicted labels of the data
        
    Returns:
        numpy.array: f1 scores
    """
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

METRICS = [
    keras.metrics.TruePositives(name='tp'),
    keras.metrics.FalsePositives(name='fp'),
    keras.metrics.TrueNegatives(name='tn'),
    keras.metrics.FalseNegatives(name='fn'), 
    keras.metrics.BinaryAccuracy(name='accuracy'),
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
    keras.metrics.AUC(name='auc'),
    f1
]

early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_auc', 
    verbose=1,
    patience=20,
    mode='max',
    restore_best_weights=True)

def _get_class_weight(labels):
    pos = sum(labels)
    total = len(labels)
    neg = total - pos
    weight_for_0 = (1 / neg)*(total)/2.0 
    weight_for_1 = (1 / pos)*(total)/2.0
    class_weight = {0: weight_for_0, 1: weight_for_1}
    
    return class_weight

def cnn_model(embedding_matrix, vocab_len, emb_dim, em_input_len, static_feature_dim):
    """Defines the CNN model structure. Please refer to the reports for a visulization of the CNN model.
    This function defines the structure of the CNN model. 
    
    Args:
        embedding_matrix (numpy.array): the embeeding matrix from gensim 
        vocab_len (int): vocabulary length, number of unique medical events available in the embeddings
        emb_dim (int): embedding dimensions, default value: 300
        em_input_length (int): intput length of the medical events, same value as the max_length as we pre-padded all event sequence to that length.
        static_feature_dim (int): number of static features (statistical features), including labs, comorbidities, hospitalizations.
        
    Returns:
        tf.keras.model: model object
    """
    seq_input = keras.Input(shape=(None,), name='event_seq') 
    static_input = keras.Input(shape=(static_feature_dim), name='static')

    # Embed each event in the sequence into a 100-dimensional vector
    seq_features = Embedding(vocab_len, 
                             emb_dim, 
                             weights=[embedding_matrix], 
                             input_length=em_input_len, trainable=False)(seq_input)

    seq_features = Conv1D(filters, kernel_size)(seq_features)
    seq_features = MaxPooling1D(pool_size=em_input_len-filters+1, strides=1)(seq_features)
    seq_features = Lambda(lambda s: K.squeeze(s, axis=1))(seq_features)
    seq_features = Dense(16, activation='relu')(seq_features)
    seq_features = Dropout(0.5)(seq_features)

    # Merge all available features into a single large vector via concatenation
    x = concatenate([seq_features, static_input])
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    # Create model
    model = keras.Model(inputs=[seq_input, static_input],
                        outputs=output)

    return model

def make_model(X_train, X_train_static, y_train, X_valid, X_valid_static, y_valid, embedding_matrix, max_length, static_f_dim, class_weight, epochs, batch_size, learning_rate):
    """Main function to build the model, define optimizer, compile the model and start training the model.
    
    Args:
        X_train (numpy.array): training data features
        X_train_static (numpy.array): training data's required static/statistical features like labs, comorbidities, hospitalizations
        y_train (numpy.array): labels of the training data
        X_valid (numpy.array): validation data features
        X_valid_static(numpy.array): validation data's required static/statistical features like labs, comorbidities, hospitalizations
        y_valid (numpy.array): labels of the validation data
        embedding_matrix(numpy.array): the embeeding matrix generated by gensim
        max_length (int): max length of the sequence/medical events
        static_f_dim (int): number of static features (statistical features), including labs, comorbidities, hospitalizations
        class_weight (dict): dictionary of class weight generated from _get_class_weight(). This is used to deal with the imbalanceness.
        epochs (int): number of epochs of training process, default is 50
        batch_size (int): number of batch size, default is 512
        learning_rate (float): learning rate of the model, default is 0.001
        
    Returns:
        tf.keras.model: a fitted model
    
    """
    model = cnn_model(embedding_matrix, embedding_matrix.shape[0], embedding_matrix.shape[1], max_length, static_f_dim)
    opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=METRICS)
    
    weighted_history = model.fit(
        {'event_seq':X_train, 'static':X_train_static},
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[early_stopping],
        validation_data = [{'event_seq':X_valid, 'static':X_valid_static}, y_valid],
        class_weight=class_weight)

    return model

def _load_training_data(base_dir, code_to_idx_map, unkown_idx, max_length, target_col):
    train_df = pd.read_csv(os.path.join(base_dir, 'train.csv'))
    encoded_seq = train_df['events'].apply(lambda x: [code_to_idx_map.get(i, unkown_idx) for i in x.split(' ')])
    X_train = pad_sequences(encoded_seq, maxlen=max_length, padding='pre')
    y_train = train_df[target_col].values
    X_train_static = pd.read_csv(os.path.join(base_dir, 'train-static.csv')).values
    
    return X_train, X_train_static, y_train

def _load_validation_data(base_dir, code_to_idx_map, unkown_idx, max_length, target_col):
    valid_df = pd.read_csv(os.path.join(base_dir, 'validation.csv'))
    encoded_seq = valid_df['events'].apply(lambda x: [code_to_idx_map.get(i, unkown_idx) for i in x.split(' ')])
    X_valid = pad_sequences(encoded_seq, maxlen=max_length, padding='pre')
    y_valid = valid_df[target_col].values
    X_valid_static = pd.read_csv(os.path.join(base_dir, 'valid-static.csv')).values
    
    return X_valid, X_valid_static, y_valid
    
    
class EpochLogger(CallbackAny2Vec):
    """EpochLogger is used during unsupervised embedding training to monitor the progress. This class also need to be imported when reading the trained embedding matrix"""
    def __init__(self):
        self.epoch = 0

    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))

    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1
        
def _load_embeddings(base_dir, embedding_dim=300):
    w2v_model = Word2Vec.load(os.path.join(base_dir, 'W2V_event_dim100_win100.model'))
    vocab = w2v_model.wv.key_to_index.keys()
    code_to_idx_map = {}
    idx_to_code_map = {}
    embedding_matrix = np.zeros(shape=(len(vocab), embedding_dim))

    for idx, code in enumerate(vocab):
        code_to_idx_map[code] = idx
        idx_to_code_map[idx] = code
        embedding_matrix[idx,:] = w2v_model.wv.key_to_index[code]

    unkown_idx = idx+1
    idx_to_code_map[unkown_idx] = 'UNK'
    code_to_idx_map['UNK'] = unkown_idx
    embedding_matrix = np.append(embedding_matrix, np.array([0]*embedding_matrix.shape[1]).reshape(1, embedding_matrix.shape[1]), axis=0)
    
    return embedding_matrix, idx_to_code_map, code_to_idx_map, unkown_idx

def _parse_args():
    parser = argparse.ArgumentParser()
    # Data, model, and output directories
    parser.add_argument('--model_dir', type=str)
    parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
    parser.add_argument('--training', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
    parser.add_argument('--validation', type=str, default=os.environ['SM_CHANNEL_VALIDATION'])
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--batch-size', type=int, default=512)
    parser.add_argument('--learning-rate', type=float, default=1e-3)
    parser.add_argument('--max-length', type=int, default=150)
    parser.add_argument('--static-f-dim', type=int, default=0)
    parser.add_argument('--target-col', type=str, default='target')
    
    return parser.parse_known_args()

if __name__ == "__main__":
    args,_ = _parse_args()
    embedding_matrix, idx_to_code_map, code_to_idx_map, unkown_idx = _load_embeddings(args.training)
    X_train, X_train_static, y_train = _load_training_data(
        args.training, code_to_idx_map, unkown_idx, args.max_length, args.target_col)
    X_valid, X_valid_static, y_valid = _load_validation_data(
        args.validation, code_to_idx_map, unkown_idx, args.max_length, args.target_col)

    class_weight = _get_class_weight(y_train)
    tf_model = make_model(X_train, X_train_static, y_train, 
                          X_valid, X_valid_static, y_valid,
                          embedding_matrix, args.max_length, args.static_f_dim,
                          class_weight,
                          args.epochs, args.batch_size, args.learning_rate)
    
    version = 1
    export_path = os.path.join(args.sm_model_dir, str(version))
    tf_model.save(
        export_path,
        overwrite=True,
        include_optimizer=True,
        save_format='tf'
)