import mxnet as mx
from mxnet import ndarray as F
import numpy as np
import json
import os

class SpatialDropout(mx.operator.CustomOp):
    def __init__(self, p, num_filters, ctx):
        self._p = float(p)
        self._num_filters = int(num_filters)
        self._ctx = ctx
        self._spatial_dropout_mask = F.ones(shape=(1, 1, 1, 1), ctx=self._ctx)
        
    def forward(self, is_train, req, in_data, out_data, aux):
        x = in_data[0]
        if is_train:
            self._spatial_dropout_mask = F.broadcast_greater(
                F.random_uniform(low=0, high=1, shape=(1, self._num_filters, 1, 1), ctx=self._ctx), 
                F.ones(shape=(1, self._num_filters, 1, 1), ctx=self._ctx) * self._p,
                ctx=self._ctx
            )
            y = F.broadcast_mul(x, self._spatial_dropout_mask, ctx=self._ctx) / (1-self._p)
            self.assign(out_data[0], req[0], y)
        else:
            self.assign(out_data[0], req[0], x)
            
    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        dy = out_grad[0]
        dx = F.broadcast_mul(self._spatial_dropout_mask, dy)
        self.assign(in_grad[0], req[0], dx)
        
@mx.operator.register('spatial_dropout')
class SpatialDropoutProp(mx.operator.CustomOpProp):
    def __init__(self, p, num_filters):
        super(SpatialDropoutProp, self).__init__(True)
        self._p = p
        self._num_filters = num_filters
        
    def infer_shape(self, in_shapes):
        data_shape = in_shapes[0]
        output_shape = data_shape
        # return 3 lists representing inputs shapes, outputs shapes, and aux data shapes.
        return (data_shape,), (output_shape,), ()
            
    def create_operator(self, ctx, in_shape, in_dtypes):
        return SpatialDropout(self._p, self._num_filters, ctx)

def load_model(model_path, prefix, epoch=0):
    sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(model_path, prefix), epoch)
    with open(os.path.join(model_path, '%s-shapes.json' % prefix)) as f:
        shapes = json.load(f)
    shape = tuple([1] + shapes[0]['shape'][1:])
    net = mx.mod.Module(sym, data_names=('data',), label_names=None)
    net.bind(data_shapes=[['data', shape]], for_training=False)
    net.set_params(arg_params, aux_params, allow_missing=True)
    return net