# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import tvm
from tvm import te
import numpy as np
from tvm import relay
from tvm.contrib import graph_executor
from tvm.relay.testing.temp_op_attr import TempOpAttr


# We use llvm target for testing functionality. `llvm` points to an older Intel
# generation machine, that legalizes to a simple lowering. Therefore, the
# legalization is overwritten such that it can be skipped and we use the
# QNNCanonicalizeOps lowering for the testing.
def legalize_qnn_dense(attrs, inputs, types):
    return None


def make_requantize_params(input_scale, output_scale, output_zero_point, out_dtype):
    config = {
        "input_scale": input_scale,
        "output_scale": output_scale,
        "output_zero_point": output_zero_point,
        "out_dtype": out_dtype,
    }
    return config


def make_configuration(
    quantized_data,
    quantized_kernel,
    dtype,
    input_shape,
    kernel_shape,
    input_zero_point,
    kernel_zero_point,
    input_scale,
    kernel_scale,
    units,
    output,
    out_dtype="int32",
    bias=None,
    requantize=None,
):
    if requantize is not None:
        assert bias is not None
    config = {
        "quantized_data": quantized_data,
        "quantized_kernel": quantized_kernel,
        "dtype": dtype,
        "input_shape": input_shape,
        "kernel_shape": kernel_shape,
        "input_zero_point": input_zero_point,
        "kernel_zero_point": kernel_zero_point,
        "input_scale": input_scale,
        "kernel_scale": kernel_scale,
        "units": units,
        "output": output,
        "out_dtype": out_dtype,
        "bias": bias,
        "requantize": requantize,
    }
    return config


def make_int_configuration(use_bias=False, requantize_output=False, per_channel=False):
    input_shape, kernel_shape, output_shape = (2, 10), (3, 10), (2, 3)
    input_zero_point, kernel_zero_point = -1, -1
    in_dtype = "int8"
    out_dtype = "int32" if not requantize_output else "int8"
    units = 3
    quantized_data_np = (
        np.array([1, 3, 5, 7, 9, 11, 13, 15, -19, -21, 1, 3, 5, 7, 9, 11, 13, -17, 17, -21])
        .astype(in_dtype)
        .reshape(input_shape)
    )
    quantized_kernel_np = (
        np.array(
            [
                1,
                3,
                5,
                7,
                9,
                11,
                13,
                15,
                17,
                19,
                1,
                3,
                5,
                7,
                9,
                11,
                13,
                15,
                17,
                19,
                1,
                3,
                5,
                7,
                9,
                11,
                13,
                15,
                17,
                19,
            ]
        )
        .astype(in_dtype)
        .reshape(kernel_shape)
    )
    input_scale = 0.5
    kernel_scale = 0.5
    output_scale = 1.0
    bias = np.array([4, 8, 12]).astype(out_dtype).reshape((units,)) if use_bias else None

    if per_channel:
        assert use_bias and requantize_output
        kernel_scale = np.array([0.5, 0.3, 0.4], dtype=np.float32)
        output = np.array([23, 14, 20, 57, 34, 47])
    elif requantize_output:
        assert use_bias
        output = np.array([23, 24, 25, 57, 58, 59])
    elif use_bias:
        output = np.array([96, 100, 104, 232, 236, 240])
    else:
        output = np.array([92, 92, 92, 228, 228, 228])

    requant_params = (
        make_requantize_params(input_scale * kernel_scale, output_scale, -1, "int8")
        if requantize_output
        else None
    )

    output = output.astype(out_dtype).reshape(output_shape)
    return make_configuration(
        quantized_data=quantized_data_np,
        quantized_kernel=quantized_kernel_np,
        dtype=in_dtype,
        input_shape=input_shape,
        kernel_shape=kernel_shape,
        input_zero_point=input_zero_point,
        kernel_zero_point=kernel_zero_point,
        input_scale=input_scale,
        kernel_scale=kernel_scale,
        units=units,
        output=output,
        bias=bias,
        requantize=requant_params,
    )


def qnn_dense_driver(test_configuration):
    in_dtype = test_configuration["dtype"]
    out_dtype = test_configuration["out_dtype"]
    quantized_data_name = "quantized_data"
    quantized_kernel_name = "quantized_kernel"
    expected_out_dtype = test_configuration["out_dtype"]
    bias_name = "bias"
    quantized_data = relay.var(
        quantized_data_name, shape=test_configuration["input_shape"], dtype=in_dtype
    )
    quantized_kernel = relay.var(
        quantized_kernel_name, shape=test_configuration["kernel_shape"], dtype=in_dtype
    )
    mod = relay.qnn.op.dense(
        quantized_data,
        quantized_kernel,
        relay.const(test_configuration["input_zero_point"], "int32"),
        relay.const(test_configuration["kernel_zero_point"], "int32"),
        relay.const(test_configuration["input_scale"], "float32"),
        relay.const(test_configuration["kernel_scale"], "float32"),
        test_configuration["units"],
    )
    if test_configuration[bias_name] is not None:
        bias = relay.var(bias_name, shape=test_configuration["bias"].shape, dtype=out_dtype)
        mod = relay.nn.bias_add(mod, bias)
    if test_configuration["requantize"] is not None:
        requantize_config = test_configuration["requantize"]
        mod = relay.qnn.op.requantize(
            mod,
            input_scale=relay.const(requantize_config["input_scale"], "float32"),
            input_zero_point=relay.const(0, "int32"),
            output_scale=relay.const(requantize_config["output_scale"], "float32"),
            output_zero_point=relay.const(requantize_config["output_zero_point"], "int32"),
            out_dtype=requantize_config["out_dtype"],
        )
        expected_out_dtype = requantize_config["out_dtype"]

    mod = relay.Function(relay.analysis.free_vars(mod), mod)
    mod = tvm.IRModule.from_expr(mod)
    mod = relay.transform.InferType()(mod)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    with tvm.transform.PassContext(opt_level=2):
        graph, lib, params = relay.build(mod, "llvm", params=None)
        mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
        mod.set_input(quantized_data_name, test_configuration[quantized_data_name])
        mod.set_input(quantized_kernel_name, test_configuration[quantized_kernel_name])
        if test_configuration[bias_name] is not None:
            mod.set_input(bias_name, test_configuration[bias_name])
        mod.set_input(**params)
        mod.run()
        res = mod.get_output(0).numpy()
        np.testing.assert_equal(res, test_configuration["output"])
        assert res.dtype == expected_out_dtype


def test_qnn_dense_without_bias():
    with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

        int32_output_without_bias_params = make_int_configuration(use_bias=False)
        qnn_dense_driver(int32_output_without_bias_params)


def test_qnn_dense_with_bias():
    with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

        int32_output_with_bias_params = make_int_configuration(use_bias=True)
        qnn_dense_driver(int32_output_with_bias_params)


def test_qnn_dense_with_requantized_output():
    with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

        int8_requantized_output_with_bias_params = make_int_configuration(
            use_bias=True, requantize_output=True
        )
        qnn_dense_driver(int8_requantized_output_with_bias_params)


def test_per_channel_weight_scale():
    with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):
        config = make_int_configuration(use_bias=True, requantize_output=True, per_channel=True)
        qnn_dense_driver(config)


if __name__ == "__main__":
    test_qnn_dense_without_bias()
    test_qnn_dense_with_bias()
    test_qnn_dense_with_requantized_output()
    test_per_channel_weight_scale()