# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import tvm
from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_executor
from tvm.runtime.vm import VirtualMachine
from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE


def dequantize_test_driver(in_dtype, quant_args, axis, in_data):
    shape = in_data.shape
    input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
    input_zero_point = relay.const(quant_args["in_zero_point"])
    input_scale = relay.const(quant_args["in_scale"])
    dequantized_output = relay.qnn.op.dequantize(
        input_data,
        input_scale=input_scale,
        input_zero_point=input_zero_point,
        axis=axis,
    )
    mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output)
    mod = tvm.IRModule.from_expr(mod)
    with tvm.transform.PassContext(opt_level=3):
        graph, lib, params = relay.build(mod, "llvm", params=None)
    rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
    rt_mod.set_input(input_data=in_data)
    rt_mod.set_input(**params)
    rt_mod.run()
    res = rt_mod.get_output(0).numpy()
    return res


def build_simulated_dequantize(input_data, scale, zp, dtype, axis=-1):
    sim_q = relay.qnn.op.simulated_dequantize(
        input_data,
        scale,
        zp,
        axis=axis,
        in_dtype=dtype,
    )
    mod = tvm.IRModule.from_expr(sim_q)
    with tvm.transform.PassContext(opt_level=3):
        vm_exec = relay.vm.compile(mod, "llvm", params=None)
    vm = VirtualMachine(vm_exec, tvm.cpu(0))
    return vm


def verify_simulated_dequantize_simple(dtype):
    data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype(dtype)
    data_fp = data.astype("float32")
    scale_np = np.float32(0.5)
    zp_np = np.int32(127)
    dtype_np = np.int32(SQNN_DTYPE_TO_CODE[dtype])
    quant_args = {"in_zero_point": zp_np, "in_scale": scale_np}
    dq_out = dequantize_test_driver(
        in_dtype=dtype,
        quant_args=quant_args,
        axis=-1,
        in_data=data,
    )
    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
    scale = relay.var("scale", shape=[])
    zp = relay.var("zp", shape=[], dtype="int32")
    dtype = relay.var("dtype", shape=[], dtype="int32")
    vm = build_simulated_dequantize(input_data, scale, zp, dtype)
    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
    np.testing.assert_allclose(sim_dq_out.numpy(), dq_out, rtol=1e-5)


def test_simulated_dequantize():
    verify_simulated_dequantize_simple("uint8")
    verify_simulated_dequantize_simple("int8")
    verify_simulated_dequantize_simple("int32")


def test_dynamic_channels():
    # Compile simulated quantize once but support either per-channel or scalar params.
    data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype("int8")
    data_fp = data.astype("float32")
    # Test scalar qnn params.
    scale_np = np.asarray([0.5]).astype("float32")
    zp_np = np.asarray([0]).astype("int32")
    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["int8"])
    quant_args = {"in_zero_point": zp_np[0], "in_scale": scale_np[0]}
    dq_out = dequantize_test_driver(
        in_dtype="int8",
        quant_args=quant_args,
        axis=0,
        in_data=data,
    )
    # Create variables with undefined shape and run with scalar inputs.
    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
    dtype = relay.var("dtype", shape=[], dtype="int32")
    vm = build_simulated_dequantize(input_data, scale, zp, dtype, axis=0)
    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
    np.testing.assert_allclose(sim_dq_out.numpy(), dq_out, rtol=1e-5)

    # Now get the perchannel quantize output and compare without recompiling.
    scale_np = np.array([0.5, 0.25]).astype("float32")
    zp_np = np.array([127, 123]).astype("int32")

    # Get the reference quantize output.
    quant_args = {"in_zero_point": zp_np, "in_scale": scale_np}
    dq_out = dequantize_test_driver(
        in_dtype="int8",
        quant_args=quant_args,
        axis=0,
        in_data=data,
    )
    # Run the simulated quantize without recompiling and confirm results match.
    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
    np.testing.assert_allclose(sim_dq_out.numpy(), dq_out, rtol=1e-5)


def test_dynamic_dtype():
    # Compile simulated quantize once but support any type of quantization.
    data = np.random.uniform(low=0, high=255, size=[2, 5]).astype("uint8")
    data_fp = data.astype("float32")
    # Test scalar uint8 to fp32.
    scale_np = np.asarray([0.5]).astype("float32")
    zp_np = np.asarray([127]).astype("int32")
    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["uint8"])
    quant_args = {"in_zero_point": zp_np[0], "in_scale": scale_np[0]}
    dq_out = dequantize_test_driver(
        in_dtype="uint8",
        quant_args=quant_args,
        axis=-1,
        in_data=data,
    )
    # Create variables with undefined shape and run with scalar inputs.
    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
    dtype = relay.var("dtype", shape=[], dtype="int32")
    vm = build_simulated_dequantize(input_data, scale, zp, dtype)
    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
    np.testing.assert_allclose(sim_dq_out.numpy(), dq_out, rtol=1e-5)

    # Now test int8 to float32 compilation.
    data = np.random.uniform(low=0, high=255, size=[2, 5]).astype("int8")
    data_fp = data.astype("float32")
    # Get the reference quantize output.
    dq_out = dequantize_test_driver(
        in_dtype="int8",
        quant_args=quant_args,
        axis=-1,
        in_data=data,
    )
    # Run the simulated quantize without recompiling and confirm results match.
    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["int8"])
    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
    np.testing.assert_allclose(sim_dq_out.numpy(), dq_out, rtol=1e-5)


if __name__ == "__main__":
    test_simulated_dequantize()
    test_dynamic_channels()
    test_dynamic_dtype()