# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import numpy as np
import tvm
from tvm import relay
from tvm.contrib import graph_executor
from tvm.relay.frontend.mxnet_qnn_op_utils import (
    dequantize_mxnet_min_max,
    quantize_mxnet_min_max,
    get_mkldnn_int8_scale,
    get_mkldnn_uint8_scale,
    quantize_conv_bias_mkldnn_from_var,
)


def test_mkldnn_dequantize():
    def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
        shape = in_data.shape
        input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
        min_range = quant_args["min_range"]
        max_range = quant_args["max_range"]
        dequantized_output = dequantize_mxnet_min_max(
            input_data, min_range=min_range, max_range=max_range, in_dtype=in_dtype
        )
        mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output)
        mod = tvm.IRModule.from_expr(mod)
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params = relay.build(mod, "llvm", params=None)
            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
            rt_mod.set_input(input_data=in_data)
            rt_mod.set_input(**params)
            rt_mod.run()
            res = rt_mod.get_output(0).numpy()
            assert np.allclose(res, verify_output_data)
            assert res.dtype == np.float32

    def test_uint8_to_float32():
        data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]).astype("uint8").reshape((2, 5))
        output = (
            np.array(
                [
                    0.0,
                    0.25048923,
                    0.50097847,
                    0.7514677,
                    1.0019569,
                    62.8728,
                    63.123287,
                    63.373775,
                    63.624268,
                    63.874756,
                ]
            )
            .astype("float32")
            .reshape((2, 5))
        )
        quant_args = {"min_range": -63.5, "max_range": 64}
        dequantize_test_driver(
            in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output
        )

    def test_int8_to_float32():
        data = (
            np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127])
            .astype("int8")
            .reshape((2, 5))
        )
        output = (
            np.array(
                [
                    -63.247063,
                    -62.745102,
                    -62.24314,
                    -61.74118,
                    -61.23922,
                    61.74118,
                    62.24314,
                    62.745102,
                    63.247063,
                    63.749023,
                ]
            )
            .astype("float32")
            .reshape((2, 5))
        )
        dequantize_args = {"min_range": -63.5, "max_range": 64}
        dequantize_test_driver(
            in_dtype="int8", quant_args=dequantize_args, in_data=data, verify_output_data=output
        )

    test_uint8_to_float32()
    test_int8_to_float32()


def test_mkldnn_quantize():
    def quantize_test_driver(out_dtype, quant_args, in_data, verify_output_data):
        shape = in_data.shape
        input_data = relay.var("input_data", shape=shape, dtype="float32")
        min_range = quant_args["min_range"]
        max_range = quant_args["max_range"]
        quantized_output, _, _ = quantize_mxnet_min_max(
            input_data, min_range=min_range, max_range=max_range, out_dtype=out_dtype
        )
        mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
        mod = tvm.IRModule.from_expr(mod)
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params = relay.build(mod, "llvm", params=None)
            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
            rt_mod.set_input(input_data=in_data)
            rt_mod.set_input(**params)
            rt_mod.run()
            res = rt_mod.get_output(0).numpy()
            assert np.allclose(res, verify_output_data)
            assert res.dtype == verify_output_data.dtype

    def test_float32_to_uint8():
        data = (
            np.array(
                [
                    0.0,
                    0.25048923,
                    0.50097847,
                    0.7514677,
                    1.0019569,
                    62.8728,
                    63.123287,
                    63.373775,
                    63.624268,
                    63.874756,
                ]
            )
            .astype("float32")
            .reshape((2, 5))
        )
        output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]).astype("uint8").reshape((2, 5))

        quant_args = {"min_range": -63.5, "max_range": 64}
        quantize_test_driver(
            out_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output
        )

    def test_float32_to_int8():
        data = (
            np.array(
                [
                    -63.247063,
                    -62.745102,
                    -62.24314,
                    -61.74118,
                    -61.23922,
                    61.74118,
                    62.24314,
                    62.745102,
                    63.247063,
                    63.749023,
                ]
            )
            .astype("float32")
            .reshape((2, 5))
        )
        output = (
            np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127])
            .astype("int8")
            .reshape((2, 5))
        )

        quant_args = {"min_range": -63.5, "max_range": 64}
        quantize_test_driver(
            out_dtype="int8", quant_args=quant_args, in_data=data, verify_output_data=output
        )

    test_float32_to_uint8()
    test_float32_to_int8()


def test_get_mkldnn_int8_scale():
    range_min = -3.904039
    range_max = 3.904039
    expected = 0.03061991354976495
    output = get_mkldnn_int8_scale(range_max=range_max, range_min=range_min)
    assert np.allclose(output, expected)


def test_get_mkldnn_uint8_scale():
    range_min = 0.0
    range_max = 55.77269
    expected = 0.21828841189047482
    output = get_mkldnn_uint8_scale(range_max=range_max, range_min=range_min)
    assert np.allclose(output, expected)


def test_quantize_conv_bias_mkldnn_from_var():
    bias_var = relay.var("bias", shape=(3,), dtype="float32")
    bias_scale = tvm.nd.array(np.array([0.5, 0.6, 0.7]))
    output = quantize_conv_bias_mkldnn_from_var(bias_var, bias_scale)
    assert isinstance(output, tvm.relay.expr.Call)
    attrs = output.attrs
    assert attrs.axis == 0
    assert attrs.out_dtype == "int32"
    assert output.op.name == "qnn.quantize"
    assert output.args[1].data == bias_scale


if __name__ == "__main__":
    test_mkldnn_dequantize()
    test_mkldnn_quantize()
    test_get_mkldnn_int8_scale()
    test_get_mkldnn_uint8_scale()
    test_quantize_conv_bias_mkldnn_from_var()