import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import numpy as np
import os


src_bucket = os.getenv("BUCKET_NAME")

EMBEDDING_DIM = 200
MAX_SEQUENCE_LENGTH = 150
MAX_NB_WORDS = 40000

class attention(Layer):
    '''
    Defines an attention layer that uses a simple MLP to get attention scores
    '''
    def __init__(self, return_sequences=True,activation=None):
        self.return_sequences = return_sequences
        self.activation = tf.keras.activations.get(activation)
        super(attention,self).__init__()
        
    def build(self, input_shape):
        self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                               initializer="normal")
        self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                               initializer="zeros")
        super(attention,self).build(input_shape)
        
    def call(self, x):
        e = self.activation(K.dot(x,self.W)+self.b)
        a = K.softmax(e, axis=1)
        output = x*a       
        if self.return_sequences:
            return output       
        return K.sum(output, axis=1)
        
class MultiLabelPrecision(tf.keras.metrics.Metric):
    '''
    Custom metric calculation - precision across multi labels
    '''
    def __init__(self, name='MultiLabelPrecision', **kwargs):       
        super(MultiLabelPrecision, self).__init__(name=name, **kwargs)
        self.tp = tf.Variable(0, dtype = 'float32')
        self.trues = tf.Variable(0, dtype = 'float32')

    def update_state(self, y_true, y_pred,sample_weight=None):
        ## converting to a 6 * None matrix
        corrects = tf.transpose(tf.cast(y_true,'float32'))
        preds = tf.transpose(tf.math.round(y_pred))
        ## converting to booleans
        booltrue = tf.equal(corrects,tf.constant(1.0))
        boolpred = tf.equal(preds,tf.constant(1.0))
        ## logical and to get true positives - including multi labels
        self.tp.assign_add(tf.reduce_sum(tf.cast(tf.math.logical_and(booltrue,boolpred),'float32')))
        ## sum to get all positives 
        self.trues.assign_add(tf.math.reduce_sum(corrects))
                      
    def result(self):     
        return self.tp/self.trues

    def reset_states(self):
        self.tp.assign(0)
        self.trues.assign(0)

def define_network(embedding_layer):
    '''
    Define LSTM network with an attention layer
    '''
    sequence_input = tf.keras.Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    ## If pretrained embedding layer is not given, train your own
    if embedding_layer == "none":
        embedded_sequences = tf.keras.layers.Embedding(MAX_NB_WORDS,EMBEDDING_DIM,input_length=MAX_SEQUENCE_LENGTH)(sequence_input)
    else:
        embedded_sequences = embedding_layer(sequence_input)
    lstm = Bidirectional(LSTM(100,dropout = 0.2, recurrent_dropout = 0.2,return_sequences=True))(embedded_sequences)
    lstm = LayerNormalization()(lstm)
    attentionlstm = attention(return_sequences=False,activation='tanh')(lstm)
    s = Dense(6,activation='sigmoid')(attentionlstm)
    model_LSTM = tf.keras.Model(inputs=[sequence_input],outputs=[s])
    print(model_LSTM.summary())
    return model_LSTM