# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """CMSIS-NN functions for testing networks""" import platform import math import numpy as np import pytest from typing import List, Dict, Optional, Any, Union, Tuple import tvm from tvm import relay def skip_if_no_reference_system(func): return pytest.mark.skipif( platform.machine() == "i686", reason="Reference system unavailable in i386 container" )(func) def count_num_calls(mod): """Counts number of CallNode(s) in the IRModule""" class CallCounter(relay.ExprVisitor): def __init__(self): super().__init__() self.count = 0 def visit_call(self, call): if isinstance(call.op, tvm.ir.Op): self.count += 1 super().visit_call(call) counter = CallCounter() for var in mod.get_global_vars(): counter.visit(mod[var.name_hint]) return counter.count def get_range_for_dtype_str(dtype): """ Produces the min,max for a give data type. Parameters ---------- dtype : str a type string (e.g., int8) Returns ------- type_info.min : int the minimum of the range type_info.max : int the maximum of the range """ try: type_info = np.iinfo(dtype) except ValueError: type_info = np.finfo(dtype) return type_info.min, type_info.max def make_module(func): """Creates IRModule from Function""" func = relay.Function(relay.analysis.free_vars(func), func) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) return mod def get_same_padding(in_shape, kernel, dilation, stride): """ Provides CMSIS-NN padding when output dim == input dim. This is TFLu's "SAME" padding case. """ dilated_kernel_h = dilation[0] * (kernel[0] - 1) + 1 out = int(math.ceil(float(in_shape[0]) / float(stride[0]))) pad = max(0, (out - 1) * stride[0] + dilated_kernel_h - in_shape[0]) pad_top = pad // 2 pad_bottom = pad - pad_top dilated_kernel_w = dilation[1] * (kernel[1] - 1) + 1 out = int(math.ceil(float(in_shape[1]) / float(stride[1]))) pad = max(0, (out - 1) * stride[1] + dilated_kernel_w - in_shape[1]) pad_left = pad // 2 pad_right = pad - pad_left return [pad_top, pad_left, pad_bottom, pad_right] def get_conv2d_qnn_params( weight_shape: List[int], input_scale: float, input_zp: int, weights_scale: Union[float, List[float]], weights_zp: int, input_dtype: str = "int8", weights_dtype: str = "int8", output_dtype: str = "int8", is_depthwise: bool = False, ) -> Tuple[float, int]: """ Calculate the output quantization parameters for convolution based on the input and weights quantization paramters and the data types. Parameters ---------- weight_shape : List[int] shape of the weights input_scale : float scale of the input tensor input_zp : int zero point of the input tensor weights_scale : Union[float, List[float]] scale(s) of the weights tensor weights_zp : int zero point of the weights tensor is_depthwise : bool whether it is a depthwise convolution input_dtype : str data type of the input tensor weights_dtype : str data type of the weights tensor output_dtype : str data type of the output tensor Returns ------- output_scale : float scale of the output tensor output_zp : int zero point of the output tensor """ input_dtype_min, input_dtype_max = get_range_for_dtype_str(input_dtype) input_max = input_scale * (input_dtype_max - input_zp) input_min = input_scale * (input_dtype_min - input_zp) weights_dtype_min, weights_dtype_max = get_range_for_dtype_str(weights_dtype) weights_sc_max = np.max(weights_scale) weights_max = weights_sc_max * (weights_dtype_max - weights_zp) weights_sc_min = np.min(weights_scale) weights_min = weights_sc_min * (weights_dtype_min - weights_zp) weights_h = weight_shape[1] weights_w = weight_shape[2] channels = weight_shape[3] num_elements = weights_h * weights_w * channels # Adjust the result if it is a depthwise convolution if is_depthwise: num_elements = num_elements / channels # The smallest and largest possible values in the unquantized output tensor output_limits = [ weights_max * input_max * num_elements, weights_min * input_max * num_elements, weights_min * input_min * num_elements, weights_max * input_min * num_elements, ] output_max = max(output_limits) output_min = min(output_limits) output_dtype_min, output_dtype_max = get_range_for_dtype_str(output_dtype) output_scale = (output_max - output_min) / (output_dtype_max - output_dtype_min) output_zp = int(output_dtype_min - (output_min / output_scale)) return output_scale, output_zp def make_qnn_relu(expr, fused_activation_fn, scale, zero_point, dtype): """Mimics convert_qnn_fused_activation_function from TFLite frontend""" quantize = lambda x: float(int(round(x / scale)) + zero_point) # Get min/max of the output dtype. This will be used to ensure that clip a_min/a_max are not # beyond the dtype range. qmin, qmax = get_range_for_dtype_str(dtype) # The input expr is a quantized tensor with its scale and zero point. We calculate the # suitable clip off points based on these scale and zero point. if fused_activation_fn == "NONE": return expr if fused_activation_fn == "RELU6": return tvm.relay.op.clip(expr, a_min=max(qmin, quantize(0)), a_max=min(qmax, quantize(6.0))) if fused_activation_fn == "RELU_N1_TO_1": return tvm.relay.op.clip( expr, a_min=max(qmin, quantize(-1.0)), a_max=min(qmax, quantize(1.0)) ) if fused_activation_fn == "RELU": return tvm.relay.op.clip(expr, a_min=max(qmin, quantize(0.0)), a_max=qmax)