# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import tvm
from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_executor
from tvm.relay.testing import run_infer_type


def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data, axis):
    shape = in_data.shape
    input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
    input_zero_point = relay.const(quant_args["in_zero_point"], "int32")
    input_scale = relay.const(quant_args["in_scale"], "float32")
    quantized_output = relay.qnn.op.dequantize(
        input_data, input_scale=input_scale, input_zero_point=input_zero_point, axis=axis
    )
    mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
    mod = tvm.IRModule.from_expr(mod)
    with tvm.transform.PassContext(opt_level=3):
        graph, lib, params = relay.build(mod, "llvm", params=None)
        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
        rt_mod.set_input(input_data=in_data)
        rt_mod.set_input(**params)
        rt_mod.run()
        res = rt_mod.get_output(0).numpy()
        np.testing.assert_equal(res, verify_output_data)
        assert res.dtype == np.float32


def test_uint8_to_float32():
    data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]).astype("uint8").reshape((2, 5))
    output = (
        np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64])
        .astype("float32")
        .reshape((2, 5))
    )
    quant_args = {"in_zero_point": 127, "in_scale": 0.5}
    dequantize_test_driver(
        in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output, axis=-1
    )


def test_int8_to_float32():
    data = (
        np.array([-128, -127, -126, -125, -124, 123, 124, 125, 126, 127])
        .astype("int8")
        .reshape((2, 5))
    )
    output = (
        np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64])
        .astype("float32")
        .reshape((2, 5))
    )
    quant_args = {"in_zero_point": -1, "in_scale": 0.5}
    dequantize_test_driver(
        in_dtype="int8", quant_args=quant_args, in_data=data, verify_output_data=output, axis=-1
    )


def test_scalar_int8_to_float32():
    data = np.array(-128).astype("int8")
    output = np.array(-63.5).astype("float32")
    quant_args = {"in_zero_point": -1, "in_scale": 0.5}
    dequantize_test_driver(
        in_dtype="int8", quant_args=quant_args, in_data=data, verify_output_data=output, axis=-1
    )


def test_int32_to_float32():
    data = np.array([113, 29, -1052]).astype("int32")
    output = np.array([0.6550452, 0.16810896, -6.098297]).astype("float32")
    quant_args = {"in_zero_point": 0, "in_scale": 0.0057968604}
    dequantize_test_driver(
        in_dtype="int32", quant_args=quant_args, in_data=data, verify_output_data=output, axis=-1
    )


def test_channelwise_axis_1():
    data = np.transpose(
        np.array([0, 1, 2, 3, 4, 243, 247, 249, 250, 251]).astype("uint8").reshape((2, 5))
    )
    output = np.transpose(
        np.array([-63.5, -63, -62.5, -62, -61.5, 30, 31, 31.5, 31.75, 32])
        .astype("float32")
        .reshape((2, 5))
    )
    quant_args = {
        "in_zero_point": np.array([127, 123]).astype("int32"),
        "in_scale": np.array([0.5, 0.25]).astype("float32"),
    }

    dequantize_test_driver(
        in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output, axis=-1
    )


def test_channelwise_axis_0():
    data = np.array([0, 1, 2, 3, 4, 243, 247, 249, 250, 251]).astype("uint8").reshape((2, 5))
    output = (
        np.array([-63.5, -63, -62.5, -62, -61.5, 30, 31, 31.5, 31.75, 32])
        .astype("float32")
        .reshape((2, 5))
    )
    quant_args = {
        "in_zero_point": np.array([127, 123]).astype("int32"),
        "in_scale": np.array([0.5, 0.25]).astype("float32"),
    }

    dequantize_test_driver(
        in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output, axis=0
    )


def test_dynamic_dequantize():
    x = relay.var("x", shape=(1, 2, 3, 4), dtype="int8")
    scale_var = relay.var("scale", shape=(), dtype="float32")
    zp_var = relay.var("zp", shape=(), dtype="int32")

    deq_x = relay.qnn.op.dequantize(x, scale_var * scale_var, zp_var + zp_var)
    tt = run_infer_type(deq_x)

    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "float32")
    func = relay.Function([x, scale_var, zp_var], deq_x)
    data = np.random.uniform(size=(1, 2, 3, 4)).astype("int8")
    scale = np.array(1).astype("float32")
    zp = np.array(0).astype("int32")

    mod = tvm.ir.IRModule.from_expr(func)

    for target, dev in tvm.testing.enabled_targets():
        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
            lib = relay.build(mod, target=target)

    module = graph_executor.GraphModule(lib["default"](dev))
    module.set_input(**{"x": data, "scale": scale, "zp": zp})
    module.run()


if __name__ == "__main__":
    test_uint8_to_float32()
    test_int8_to_float32()
    test_scalar_int8_to_float32()
    test_int32_to_float32()
    test_channelwise_axis_1()
    test_channelwise_axis_0()
    test_dynamic_dequantize()