# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""Arm Compute Library integration dense tests."""

import numpy as np

import tvm
from tvm import relay
from tvm import testing
from test_arm_compute_lib.infrastructure import (
    Device,
    skip_runtime_test,
    skip_codegen_test,
    build_and_run,
    verify,
    verify_codegen,
)


def _get_model(shape, weight_shape, units, dtype, var_names, has_bias=False):
    """Return a model and any parameters it may have"""
    a = relay.var(next(var_names), shape=shape, dtype=dtype)
    w = tvm.nd.array(np.random.uniform(-128, 127, weight_shape).astype(dtype))
    weights = relay.const(w, dtype)
    out = relay.nn.dense(a, weights, units=units, out_dtype=dtype)
    params = {"w": w}
    if has_bias:
        b = tvm.nd.array(np.random.randint(-128, 127, weight_shape[0]).astype(dtype))
        biasc = relay.const(b, dtype)
        out = relay.nn.bias_add(out, biasc)
        params["b"] = b
    return out, params


def _get_qnn_params(input_zp, input_sc, kernel_zp, kernel_sc, kernel_h, kernel_w):
    """Get output qnn parameters given input and kernel parameters."""
    input_max = input_sc * (255 - input_zp)
    input_min = -input_sc * input_zp
    kernel_max = kernel_sc * (255 - kernel_zp)
    kernel_min = -kernel_sc * kernel_zp
    output_limits = [
        kernel_max * kernel_h * kernel_w * input_max,
        kernel_min * kernel_h * kernel_w * input_max,
        kernel_min * kernel_h * kernel_w * input_min,
        kernel_max * kernel_h * kernel_w * input_min,
    ]
    output_max = max(output_limits)
    output_min = min(output_limits)
    output_sc = (output_max - output_min) / 255
    output_zp = -int(output_min / output_sc)
    return output_zp, output_sc


def _get_qnn_model(
    shape,
    weight_shape,
    units,
    dtype,
    input_zp,
    input_sc,
    kernel_zp,
    kernel_sc,
    output_zp,
    output_sc,
    var_names,
    has_bias=False,
):
    a = relay.var(next(var_names), shape=shape, dtype=dtype)
    w = tvm.nd.array(np.random.uniform(-128, 127, weight_shape).astype(dtype))
    weights = relay.const(w, dtype)
    out = relay.qnn.op.dense(
        a,
        weights,
        units=units,
        input_zero_point=relay.const(input_zp, "int32"),
        kernel_zero_point=relay.const(kernel_zp, "int32"),
        input_scale=relay.const(input_sc, "float32"),
        kernel_scale=relay.const(kernel_sc, "float32"),
        out_dtype="int32",
    )
    params = {"w": w}
    if has_bias:
        b = tvm.nd.array(np.random.randint(0, 255, weight_shape[0]).astype("int32"))
        biasc = relay.const(b, "int32")
        out = relay.nn.bias_add(out, biasc)
        params["b"] = b
    out = relay.qnn.op.requantize(
        out,
        relay.const(input_sc * kernel_sc, "float32"),  # input scale
        relay.const(0, "int32"),  # input zero point
        relay.const(output_sc, "float32"),  # output scale
        relay.const(output_zp, "int32"),  # output zero point
        out_dtype="uint8",
    )
    return out, params


def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False):
    output_shape = (shape[0], units)
    out_dtype = "int32" if dtype == "uint8" else "float32"

    node = {
        "op": "kernel",
        "name": "nn.dense",
        "inputs": [],
        "attrs": {
            "num_outputs": "1",
            "out_dtype": [[out_dtype]],
            "shape": [[list(output_shape)]],
            "dtype": [[dtype]],
            "units": [[str(units)]],
        },
    }

    inputs = [
        {"op": "input", "name": "", "attrs": {"shape": [[list(shape)]], "dtype": [[str(dtype)]]}},
        {
            "op": "const",
            "name": "",
            "attrs": {"shape": [[list(weight_shape)]], "dtype": [[str(dtype)]]},
        },
    ]

    # qnn.dense params, input and kernel
    if dtype == "uint8":
        node["name"] = "qnn.dense"
        for param_dtype in ["int32", "float32"]:
            for _ in range(2):
                inputs.append(
                    {
                        "op": "const",
                        "name": "",
                        "attrs": {"shape": [[[]]], "dtype": [[param_dtype]]},
                    }
                )

    if has_bias:
        bias_dtype = "int32" if dtype == "uint8" else "float32"
        bias_shape = (
            [1, weight_shape[0]]
            if dtype == "float32" and weight_shape[0] != 1
            else [weight_shape[0]]
        )
        inputs.append(
            {
                "op": "const",
                "name": "",
                "attrs": {"shape": [[bias_shape]], "dtype": [[bias_dtype]]},
            }
        )

    # qnn.dense params, output
    if dtype == "uint8":
        for param_dtype in ["float32", "int32"]:
            inputs.append(
                {"op": "const", "name": "", "attrs": {"shape": [[[]]], "dtype": [[param_dtype]]}}
            )

    input_idx = 0
    for _ in range(len(inputs)):
        node["inputs"].append([input_idx, 0, 0])
        input_idx += 1
    node["attrs"]["num_inputs"] = str(len(inputs))
    inputs.append(node)
    return inputs


def test_dense():
    Device.load("test_config.json")

    if skip_runtime_test():
        return

    device = Device()
    np.random.seed(0)
    dtype = "float32"
    trials = [
        [(1, 128), (16, 128), 16, True],
        [(1, 128), (16, 128), 16, False],
        [(32, 32), (32, 32), 32, True],
        [(32, 32), (32, 32), 32, False],
        [(1, 64), (1, 64), 1, True],
        [(1, 64), (1, 64), 1, False],
        [(11, 2), (2, 2), 2, True],
        [(11, 2), (2, 2), 2, False],
    ]
    for shape, weight_shape, units, composite in trials:
        outputs = []
        inputs = {"a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype))}
        func, params = _get_model(
            shape, weight_shape, units, dtype, var_names=iter(inputs), has_bias=composite
        )
        for acl in [False, True]:
            outputs.append(
                build_and_run(
                    func,
                    inputs,
                    1,
                    params,
                    device,
                    enable_acl=acl,
                )[0]
            )
        config = {
            "shape": shape,
            "weight_shape": weight_shape,
            "units": units,
            "dtype": dtype,
            "composite operators (bias)": composite,
        }
        verify(outputs, atol=0.001, rtol=0.01, config=config)


def test_codegen_dense():
    if skip_codegen_test():
        return

    np.random.seed(0)
    dtype = "float32"
    trials = [
        [(1, 128), (16, 128), 16, True],
        [(1, 128), (16, 128), 16, False],
        [(32, 32), (32, 32), 32, True],
        [(32, 32), (32, 32), 32, False],
        [(1, 64), (1, 64), 1, True],
        [(1, 64), (1, 64), 1, False],
        [(11, 2), (2, 2), 2, True],
        [(11, 2), (2, 2), 2, False],
    ]
    for shape, weight_shape, units, composite in trials:
        inputs = {"a"}

        args = (shape, weight_shape, units, dtype)

        func, params = _get_model(*args, var_names=iter(inputs), has_bias=composite)
        exp_codegen = _get_expected_codegen(*args, has_bias=composite)
        verify_codegen(func, exp_codegen)


def test_qnn_dense():
    Device.load("test_config.json")

    if skip_runtime_test():
        return

    device = Device()
    np.random.seed(0)

    dtype = "uint8"
    trials = [
        [(1, 2), (2, 2), 2, True],
        [(1, 2), (2, 2), 2, False],
        [(4, 4), (4, 4), 4, True],
        [(4, 4), (4, 4), 4, False],
        [(16, 16), (4, 16), 4, True],
        [(16, 16), (4, 16), 4, False],
        [(1, 128), (16, 128), 16, True],
        [(1, 128), (16, 128), 16, False],
        [(32, 32), (32, 32), 32, True],
        [(32, 32), (32, 32), 32, False],
        [(1, 64), (1, 64), 1, True],
        [(1, 64), (1, 64), 1, False],
    ]
    for shape, weight_shape, units, composite in trials:
        outputs = []
        inputs = {"a": tvm.nd.array(np.random.uniform(0, 255, shape).astype(dtype))}
        input_zp = 100
        input_sc = 0.5
        kernel_zp = 50
        kernel_sc = 0.03
        output_zp, output_sc = _get_qnn_params(
            input_zp, input_sc, kernel_zp, kernel_sc, weight_shape[0], weight_shape[1]
        )

        func, params = _get_qnn_model(
            shape,
            weight_shape,
            units,
            dtype,
            input_zp,
            input_sc,
            kernel_zp,
            kernel_sc,
            output_zp,
            output_sc,
            var_names=iter(inputs),
            has_bias=composite,
        )

        for acl in [False, True]:
            outputs.append(
                build_and_run(
                    func,
                    inputs,
                    1,
                    params,
                    device,
                    enable_acl=acl,
                )[0]
            )

        config = {
            "shape": shape,
            "weight_shape": weight_shape,
            "units": units,
            "dtype": dtype,
            "composite operators (bias)": composite,
            "input scale": input_sc,
            "input zero point": input_zp,
            "kernel scale": kernel_sc,
            "kernel zero point": kernel_zp,
            "output scale": output_sc,
            "output zero point": output_zp,
        }
        verify(outputs, atol=1, rtol=0, config=config, verify_saturation=True)


def test_codegen_qnn_dense():
    if skip_codegen_test():
        return

    np.random.seed(0)

    dtype = "uint8"
    trials = [
        [(1, 2), (2, 2), 2, True],
        [(1, 2), (2, 2), 2, False],
        [(4, 4), (4, 4), 4, True],
        [(4, 4), (4, 4), 4, False],
        [(16, 16), (4, 16), 4, True],
        [(16, 16), (4, 16), 4, False],
        [(1, 128), (16, 128), 16, True],
        [(1, 128), (16, 128), 16, False],
        [(32, 32), (32, 32), 32, True],
        [(32, 32), (32, 32), 32, False],
        [(1, 64), (1, 64), 1, True],
        [(1, 64), (1, 64), 1, False],
    ]
    for shape, weight_shape, units, composite in trials:
        inputs = {"a"}
        args = (shape, weight_shape, units, dtype)

        input_zp = 100
        input_sc = 0.5
        kernel_zp = 25
        kernel_sc = 0.03
        output_zp, output_sc = _get_qnn_params(
            input_zp, input_sc, kernel_zp, kernel_sc, weight_shape[0], weight_shape[1]
        )

        func, params = _get_qnn_model(
            *args,
            var_names=iter(inputs),
            input_zp=input_zp,
            input_sc=input_sc,
            kernel_zp=kernel_zp,
            kernel_sc=kernel_sc,
            output_zp=output_zp,
            output_sc=output_sc,
            has_bias=composite,
        )
        exp_codegen = _get_expected_codegen(*args, has_bias=composite)
        verify_codegen(func, exp_codegen)


if __name__ == "__main__":
    test_dense()
    test_qnn_dense()
    test_codegen_dense()
    test_codegen_qnn_dense()