# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # pylint: disable=invalid-name, unused-argument import pytest pytest.importorskip("ethosu.vela") import numpy as np import tvm from tvm.tir import stmt_functor from tvm.script import tir as T from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator from tvm.relay.backend.contrib.ethosu import util import ethosu.vela.api as vapi # fmt: off """A sample tir test case for translator""" @tvm.script.ir_module class SingleEthosUConv2D: @T.prim_func def main(placeholder: T.handle, placeholder_1: T.handle, placeholder_2: T.handle, ethosu_conv2d: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_4 = T.match_buffer(placeholder_1, [1, 1, 3, 16], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_5 = T.match_buffer(placeholder_2, [16], dtype="int32", elem_offset=0, align=128, offset_factor=1) placeholder_3 = T.match_buffer(placeholder, [1, 8, 8, 3], dtype="uint8", elem_offset=0, align=128, offset_factor=1) ethosu_conv2d_1 = T.match_buffer(ethosu_conv2d, [1, 8, 8, 16], dtype="uint8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 8, 8, 3, 8, 0, 8, T.load("uint8", placeholder_3.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 8, 8, 16, 8, 0, 8, T.load("uint8", ethosu_conv2d_1.data, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_4.data, 0), 0, 12, T.load("uint8", placeholder_5.data, 0), 0, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="uint8")) # fmt: on # fmt: off """A sample tir test case with multiple convolutions for translator""" @tvm.script.ir_module class MultiEthosUConv2D: @T.prim_func def main(placeholder: T.handle, placeholder_1: T.handle, placeholder_2: T.handle, placeholder_3: T.handle, placeholder_4: T.handle, ethosu_conv2d: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_9 = T.match_buffer(placeholder_3, [1, 1, 32, 8], dtype="uint8", elem_offset=0, align=128, offset_factor=1) ethosu_conv2d_1 = T.match_buffer(ethosu_conv2d, [1, 8, 8, 8], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_7 = T.match_buffer(placeholder_1, [1, 1, 3, 32], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_6 = T.match_buffer(placeholder, [1, 8, 8, 3], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_8 = T.match_buffer(placeholder_2, [32], dtype="int32", elem_offset=0, align=128, offset_factor=1) placeholder_5 = T.match_buffer(placeholder_4, [8], dtype="int32", elem_offset=0, align=128, offset_factor=1) # body ethosu_conv2d_2 = T.allocate([1024], "uint8", "global") ethosu_conv2d_3 = T.allocate([2048], "uint8", "global") T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 3, 4, 0, 8, T.load("uint8", placeholder_6.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 4, 8, 32, 4, 0, 8, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_7.data, 0), 0, 12, T.load("uint8", placeholder_8.data, 0), 0, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="uint8")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 32, 4, 0, 8, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "uint8", 4, 8, 8, 4, 0, 8, T.load("uint8", ethosu_conv2d_1.data, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_9.data, 0), 0, 12, T.load("uint8", placeholder_5.data, 0), 0, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="uint8")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 3, 4, 0, 8, T.load("uint8", placeholder_6.data, 96), 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 4, 8, 32, 4, 0, 8, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_7.data, 0), 0, 12, T.load("uint8", placeholder_8.data, 0), 0, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="uint8")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 4, 8, 32, 4, 0, 8, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "uint8", 4, 8, 8, 4, 0, 8, T.load("uint8", ethosu_conv2d_1.data, 256), 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_9.data, 0), 0, 12, T.load("uint8", placeholder_5.data, 0), 0, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="uint8")) # fmt: on # fmt: off """A sample tir test case with copy operations for translator""" @tvm.script.ir_module class MultiEthosUCopy: @T.prim_func def main(placeholder: T.handle, placeholder_1: T.handle, placeholder_2: T.handle, ethosu_conv2d: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_3 = T.match_buffer(placeholder, [1, 16, 16, 32], dtype="uint8", elem_offset=0, align=128, offset_factor=1) ethosu_conv2d_1 = T.match_buffer(ethosu_conv2d, [1, 16, 16, 8], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_5 = T.match_buffer(placeholder_2, [8], dtype="int32", elem_offset=0, align=128, offset_factor=1) placeholder_4 = T.match_buffer(placeholder_1, [8, 1, 1, 32], dtype="uint8", elem_offset=0, align=128, offset_factor=1) # body placeholder_global = T.allocate([256], "uint8", "global") placeholder_d_global = T.allocate([8], "int32", "global") T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", placeholder_4.data, 0), 256, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("int32", placeholder_5.data, 0), 8, T.load("int32", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 32, 16, 0, 16, T.load("uint8", placeholder_3.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "uint8", 16, 16, 8, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 0, 12, T.load("uint8", placeholder_d_global, 0), 0, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) # fmt: on # fmt: off """A TIR test module of weight streaming""" @tvm.script.ir_module class WeightStreamOnly: @T.prim_func def main(placeholder: T.handle, ethosu_conv2d: T.handle, placeholder_1: T.handle, placeholder_2: T.handle, placeholder_3: T.handle, placeholder_4: T.handle, placeholder_5: T.handle, placeholder_6: T.handle, placeholder_7: T.handle, placeholder_8: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) buffer_4 = T.match_buffer(placeholder_5, [144], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_3 = T.match_buffer(placeholder_4, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_7 = T.match_buffer(placeholder_7, [144], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_5 = T.match_buffer(placeholder_1, [144], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_6 = T.match_buffer(placeholder_6, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) ethosu_conv2d_1 = T.match_buffer(ethosu_conv2d, [1, 16, 16, 8], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_1 = T.match_buffer(placeholder_3, [144], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_2 = T.match_buffer(placeholder_2, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_9 = T.match_buffer(placeholder, [1, 16, 16, 32], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer = T.match_buffer(placeholder_8, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) # body placeholder_global = T.allocate([144], "uint8", "global") placeholder_d_global = T.allocate([20], "uint8", "global") T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_5.data, 0), 144, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_2.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 32, 16, 0, 16, T.load("uint8", placeholder_9.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 144, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_1.data, 0), 144, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_3.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 32, 16, 0, 16, T.load("uint8", placeholder_9.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 2), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 144, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_4.data, 0), 144, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_6.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 32, 16, 0, 16, T.load("uint8", placeholder_9.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 4), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 144, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_7.data, 0), 144, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 32, 16, 0, 16, T.load("uint8", placeholder_9.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 6), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 144, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) __tvm_meta__ = None # fmt: on # fmt: off """A TIR test module of weight streaming and direct reading""" @tvm.script.ir_module class MixedRead: @T.prim_func def main(placeholder: T.handle, placeholder_1: T.handle, ethosu_conv2d: T.handle, placeholder_2: T.handle, placeholder_3: T.handle, placeholder_4: T.handle, placeholder_5: T.handle, placeholder_6: T.handle, placeholder_7: T.handle, placeholder_8: T.handle, placeholder_9: T.handle, placeholder_10: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) buffer_5 = T.match_buffer(placeholder_1, [592], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_7 = T.match_buffer(placeholder_2, [160], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_3 = T.match_buffer(placeholder_7, [80], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_6 = T.match_buffer(placeholder_4, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_9 = T.match_buffer(placeholder_5, [80], dtype="uint8", elem_offset=0, align=128, offset_factor=1) ethosu_conv2d_1 = T.match_buffer(ethosu_conv2d, [1, 16, 16, 8], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer = T.match_buffer(placeholder_8, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_1 = T.match_buffer(placeholder_10, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_11 = T.match_buffer(placeholder, [1, 16, 16, 32], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_2 = T.match_buffer(placeholder_6, [20], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_4 = T.match_buffer(placeholder_3, [80], dtype="uint8", elem_offset=0, align=128, offset_factor=1) buffer_8 = T.match_buffer(placeholder_9, [80], dtype="uint8", elem_offset=0, align=128, offset_factor=1) # body ethosu_conv2d_2 = T.allocate([4096], "uint8", "global") placeholder_global = T.allocate([80], "uint8", "global") placeholder_d_global = T.allocate([20], "uint8", "global") T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 32, 16, 0, 16, T.load("uint8", placeholder_11.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "uint8", 16, 16, 16, 16, 0, 16, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", buffer_5.data, 0), 592, 12, T.load("uint8", buffer_7.data, 0), 160, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_4.data, 0), 80, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_6.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 16, 16, 0, 16, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 80, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_9.data, 0), 80, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_2.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 16, 16, 0, 16, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 2), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 80, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_3.data, 0), 80, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 16, 16, 0, 16, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 4), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 80, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_8.data, 0), 80, T.load("uint8", placeholder_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_1.data, 0), 20, T.load("uint8", placeholder_d_global, 0), dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 16, 16, 16, 16, 0, 16, T.load("uint8", ethosu_conv2d_2, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "uint8", 16, 16, 2, 16, 0, 16, T.load("uint8", ethosu_conv2d_1.data, 6), 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 80, 12, T.load("uint8", placeholder_d_global, 0), 20, 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", dtype="handle")) __tvm_meta__ = None # fmt: on def test_buffer_info_extraction(): test_cases = [ { # Stimulus "tir_module": SingleEthosUConv2D, "param_dict": { 1: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 16], "uint8" ), 2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"), }, # Reference Outputs "constants": { "placeholder_4": 1, "placeholder_5": 2, }, "data_buffers": { "placeholder_3": ( [1, 8, 8, 3], "uint8", tir_to_cs_translator.BufferType.input_or_output, ), "ethosu_conv2d_1": ( [1, 8, 8, 16], "uint8", tir_to_cs_translator.BufferType.input_or_output, ), }, }, { "tir_module": MultiEthosUConv2D, "param_dict": { 1: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 32], "uint8" ), 2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [32], "int32"), 3: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 32, 8], "uint8" ), 4: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"), }, # Reference Outputs "constants": { "placeholder_5": 4, "placeholder_7": 1, "placeholder_8": 2, "placeholder_9": 3, }, "data_buffers": { "placeholder_6": ( [1, 8, 8, 3], "uint8", tir_to_cs_translator.BufferType.input_or_output, ), "ethosu_conv2d_1": ( [1, 8, 8, 8], "uint8", tir_to_cs_translator.BufferType.input_or_output, ), "ethosu_conv2d_2": ([1024], "uint8", tir_to_cs_translator.BufferType.scratch), "ethosu_conv2d_3": ([2048], "uint8", tir_to_cs_translator.BufferType.scratch), }, }, ] for test_case in test_cases: # With Target Hooks the TIR module needs a target attached # and lowered via make unpacked API. tir_mod = test_case["tir_module"] tir_mod["main"] = tir_mod["main"].with_attr("target", tvm.target.Target("ethos-u")) tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod) buffer_info = tir_to_cs_translator.extract_buffer_info(tir_mod, test_case["param_dict"]) for buffer_var, info in buffer_info.items(): buffer_name = buffer_var.name if buffer_name in test_case["constants"].keys(): assert ( info.values == test_case["param_dict"][test_case["constants"][buffer_name]] ).all() assert ( info.dtype == test_case["param_dict"][test_case["constants"][buffer_name]].dtype ) info.btype == tir_to_cs_translator.BufferType.constant else: assert info.btype == test_case["data_buffers"][buffer_name][2] def test_translate_ethosu_conv2d(): test_cases = [ { # Stimulus "tir_module": SingleEthosUConv2D, "param_dict": { 1: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 16], "uint8" ), 2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"), }, # Reference outputs "ref": [ { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(8, 8, 3), "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(24, 3, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(8, 8, 16), "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(128, 16, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "rounding_mode": vapi.NpuRoundingMode.TFL, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, } ], }, { "tir_module": MultiEthosUConv2D, "param_dict": { 1: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 32], "uint8" ), 2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [32], "int32"), 3: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 32, 8], "uint8" ), 4: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"), }, # Reference Outputs "ref": [ { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 3), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(24, 3, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": {"op": None}, "rounding_mode": vapi.NpuRoundingMode.TFL, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 8), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(64, 8, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "rounding_mode": vapi.NpuRoundingMode.TFL, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 3), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(24, 3, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "rounding_mode": vapi.NpuRoundingMode.TFL, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 8), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(64, 8, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "rounding_mode": vapi.NpuRoundingMode.TFL, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, ], }, ] def extract_ethosu_conv2d_extern_calls(mod): """This function will obtain all ethosu_conv2d calls from a NPU TIR module Parameters ---------- mod : tvm.IRModule This is a NPU TIR Module Returns ------- list of tvm.tir.Call objects that are tir extern calls for ethosu_conv2d """ # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_conv2d_calls = list() def populate_ethosu_conv2d_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_conv2d" ): ethosu_conv2d_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_conv2d_calls) return ethosu_conv2d_calls for test_case in test_cases: ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_case["tir_module"]) for idx, ethosu_conv2d_call in enumerate(ethosu_conv2d_calls): ref = test_case["ref"][idx] npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_conv2d(ethosu_conv2d_call) # Compare IFM assert npu_op.ifm.data_type == ref["ifm"]["data_type"] assert npu_op.ifm.shape == ref["ifm"]["shape"] assert npu_op.ifm.tiles.height_0 == ref["ifm"]["tiles"].height_0 assert npu_op.ifm.tiles.height_1 == ref["ifm"]["tiles"].height_1 assert npu_op.ifm.tiles.width_0 == ref["ifm"]["tiles"].width_0 assert npu_op.ifm.quantization == ref["ifm"]["quantization"] assert npu_op.ifm.layout == ref["ifm"]["layout"] assert npu_op.ifm.strides == ref["ifm"]["strides"] # Compare OFM assert npu_op.ofm.data_type == ref["ofm"]["data_type"] assert npu_op.ofm.shape == ref["ofm"]["shape"] assert npu_op.ofm.tiles.height_0 == ref["ofm"]["tiles"].height_0 assert npu_op.ofm.tiles.height_1 == ref["ofm"]["tiles"].height_1 assert npu_op.ofm.tiles.width_0 == ref["ofm"]["tiles"].width_0 assert npu_op.ofm.quantization == ref["ofm"]["quantization"] assert npu_op.ofm.layout == ref["ofm"]["layout"] assert npu_op.ofm.strides == ref["ofm"]["strides"] # Compare kernel and padding assert npu_op.kernel.__dict__ == ref["kernel"].__dict__ assert npu_op.padding == ref["padding"] # Compare activation if ref["activation"]["op"] is None: assert npu_op.activation is None else: assert npu_op.activation.op_type == ref["activation"]["op"] assert npu_op.activation.min == ref["activation"]["min"] assert npu_op.activation.max == ref["activation"]["max"] # Compare rounding mode assert npu_op.rounding_mode == ref["rounding_mode"] # Compare ifm upscaling assert npu_op.ifm_upscale == ref["ifm_upscale"] # Compare weight quantization parameters assert w_zero_point == ref["w_zero_point"] # fmt: off """A ethosu_depthwise_conv2d tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuDepthwiseConv2D: @T.prim_func def main(placeholder: T.handle, placeholder_1: T.handle, placeholder_2: T.handle, ethosu_depthwise_conv2d: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_4 = T.match_buffer(placeholder_1, [3, 3, 2, 1], dtype="int8", elem_offset=0, align=128, offset_factor=1) placeholder_5 = T.match_buffer(placeholder_2, [3, 10], dtype="uint8", elem_offset=0, align=128, offset_factor=1) placeholder_3 = T.match_buffer(placeholder, [1, 8, 8, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_depthwise_conv2d_1 = T.match_buffer(ethosu_depthwise_conv2d, [1, 6, 7, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 8, 8, 3, 8, 0, 8, T.load("int8", placeholder_3.data, 0), 0, 0, 0, T.float32(0.6), 11, "NHWC", 24, 3, 1, "int8", 6, 7, 3, 6, 0, 7, T.load("int8", ethosu_depthwise_conv2d_1.data, 0), 0, 0, 0, T.float32(0.26), 15, "NHWC", 21, 3, 1, 2, 3, 1, 1, 1, 1, T.load("int8", placeholder_4.data, 0), 18, 13, T.load("uint8", placeholder_5.data, 0), 30, 0, 0, 0, 0, "CLIP", 15, 105, "TFL", "NONE", dtype="int8")) __tvm_meta__ = None # fmt: on def test_translate_ethosu_depthwise_conv2d(): def extract_ethosu_depthwise_conv2d_extern_call(mod): # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_depthwise_conv2d_calls = list() def populate_ethosu_depthwise_conv2d_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_depthwise_conv2d" ): ethosu_depthwise_conv2d_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_depthwise_conv2d_calls) return ethosu_depthwise_conv2d_calls[0] depthwise_conv2d_call = extract_ethosu_depthwise_conv2d_extern_call(SingleEthosuDepthwiseConv2D) npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_depthwise_conv2d( depthwise_conv2d_call ) assert npu_op.ifm.data_type == vapi.NpuDataType.INT8 assert npu_op.ifm.shape == vapi.NpuShape3D(8, 8, 3) assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_0 assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_1 assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).width_0 assert npu_op.ifm.quantization == pytest.approx(vapi.NpuQuantization(0.6, 11)) assert npu_op.ifm.layout == vapi.NpuLayout.NHWC assert npu_op.ifm.strides == vapi.NpuShape3D(24, 3, 1) # Compare OFM assert npu_op.ofm.data_type == vapi.NpuDataType.INT8 assert npu_op.ofm.shape == vapi.NpuShape3D(6, 7, 3) assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(6, 0, 8, [0, 0, 0, 0]).height_0 assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).height_1 assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).width_0 assert npu_op.ofm.quantization == pytest.approx(vapi.NpuQuantization(0.26, 15)) assert npu_op.ofm.layout == vapi.NpuLayout.NHWC assert npu_op.ofm.strides == vapi.NpuShape3D(21, 3, 1) # Compare kernel and padding assert ( npu_op.kernel.__dict__ == vapi.NpuKernel(w=2, h=3, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1).__dict__ ) assert npu_op.padding == vapi.NpuPadding(top=0, left=0, bottom=0, right=0) # Compare activation assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU assert npu_op.activation.min == 0 assert npu_op.activation.max == pytest.approx(23.4) # Compare rounding mode assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL # Compare ifm upscaling assert npu_op.ifm_upscale == vapi.NpuResamplingMode.NONE # Compare weight quantization parameters assert w_zero_point == 13 def test_translate_ethosu_copy(): def extract_ethosu_copy_extern_calls(mod): """This function will obtain all ethosu_conv2d calls from a NPU TIR module Parameters ---------- mod : tvm.IRModule This is a NPU TIR Module Returns ------- list of tvm.tir.Call objects that are tir extern calls for ethosu_conv2d """ # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_copy_calls = list() def populate_ethosu_copy_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_copy" ): ethosu_copy_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_copy_calls) return ethosu_copy_calls test_cases = [ { # Stimulus "tir_module": MultiEthosUCopy, "param_dict": { 1: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [8, 1, 1, 32], "uint8" ), 2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"), }, # Reference outputs "ref": [ { "src": "placeholder_4", "dest": "placeholder_global", "length": 256, }, { "src": "placeholder_5", "dest": "placeholder_d_global", "length": 8, }, ], }, ] for test_case in test_cases: ethosu_copy_calls = extract_ethosu_copy_extern_calls(test_case["tir_module"]) for idx, ethosu_copy_call in enumerate(ethosu_copy_calls): npu_dma_op = tir_to_cs_translator.translate_ethosu_tir_call_extern(ethosu_copy_call) assert npu_dma_op.src.address.buffer_var.name == test_case["ref"][idx]["src"] assert npu_dma_op.dest.address.buffer_var.name == test_case["ref"][idx]["dest"] assert npu_dma_op.src.length == test_case["ref"][idx]["length"] assert npu_dma_op.dest.length == test_case["ref"][idx]["length"] # fmt: off @tvm.script.ir_module class MixedConstantDatatypes: @T.prim_func def main(placeholder: T.handle, placeholder_1: T.handle, placeholder_2: T.handle, ethosu_write: T.handle, placeholder_3: T.handle) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) placeholder_4 = T.match_buffer(placeholder, [1, 8, 16, 16], dtype="int8") buffer = T.match_buffer(placeholder_1, [160], dtype="uint8") placeholder_5 = T.match_buffer(placeholder_2, [1, 1, 1, 1], dtype="int16") ethosu_write_1 = T.match_buffer(ethosu_write, [1, 1, 1, 16], dtype="int8") buffer_1 = T.match_buffer(placeholder_3, [272], dtype="uint8") # body placeholder_global = T.allocate([272], "uint8", "global") placeholder_d_global = T.allocate([160], "uint8", "global") ethosu_write_2 = T.allocate([16], "int16", "global") placeholder_d_global_1 = T.allocate([1], "int16", "global") T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer_1.data, 0), 272, T.load("uint8", placeholder_global, 0), dtype="uint8")) T.evaluate(T.call_extern("ethosu_copy", T.load("uint8", buffer.data, 0), 160, T.load("uint8", placeholder_d_global, 0), dtype="uint8")) T.evaluate(T.call_extern("ethosu_depthwise_conv2d", "int8", 8, 16, 16, 8, 0, 16, T.load("int8", placeholder_4.data, 0), 0, 0, 0, T.float32(0.0039215548895299435), -128, "NHWC", 256, 16, 1, "int16", 1, 1, 16, 1, 0, 1, T.load("int16", ethosu_write_2, 0), 0, 0, 0, T.float32(0.0023205536417663097), -128, "NHWC", 1, 1, 1, 16, 8, 1, 1, 1, 1, T.load("uint8", placeholder_global, 0), 272, 0, T.load("uint8", placeholder_d_global, 0), 160, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="int16")) T.evaluate(T.call_extern("ethosu_copy", T.load("int16", placeholder_5.data, 0), 1, T.load("int16", placeholder_d_global_1, 0), dtype="int16")) T.evaluate(T.call_extern("ethosu_binary_elementwise", "int16", 1, 1, 16, 1, 0, 1, T.load("int16", ethosu_write_2, 0), 0, 0, 0, T.float32(0.0023205536417663097), -128, "NHWC", 1, 1, 1, "int16", 1, 1, 1, 1, 0, 1, T.load("int16", placeholder_d_global_1, 0), 0, 0, 0, T.float32(0.0078125018482064768), 0, "NHWC", 1, 1, 1, "int8", 1, 1, 16, 1, 0, 1, T.load("int8", ethosu_write_1.data, 0), 0, 0, 0, T.float32(0.0023205536417663097), -128, "NHWC", 1, 1, 1, "MUL", 0, "NONE", 0, 0, "NATURAL", dtype="int8")) # fmt: on def test_assign_addresses(): test_cases = [ { # Stimulus "tir_module": WeightStreamOnly, "param_dict": { 2: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"), 3: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), 4: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"), 5: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), 6: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"), 7: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), 8: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [144], "uint8"), 9: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), }, }, { # Stimulus "tir_module": MixedRead, "param_dict": { 1: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [592], "uint8"), 3: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [160], "uint8"), 4: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"), 5: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), 6: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"), 7: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), 8: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"), 9: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), 10: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [80], "uint8"), 11: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [20], "uint8"), }, }, { # Stimulus "tir_module": MixedConstantDatatypes, "param_dict": { 1: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [160], "uint8"), 2: np.random.randint(np.iinfo("int16").min, np.iinfo("int16").max, [1], "int16"), 4: np.random.randint(np.iinfo("uint8").min, np.iinfo("uint8").max, [272], "uint8"), }, }, ] def extract_call_extern_list(mod): """This function will obtain all ethosu_conv2d calls from a NPU TIR module Parameters ---------- mod : tvm.IRModule This is a NPU TIR Module Returns ------- list of tvm.tir.Call objects that are tir extern calls for ethosu_conv2d """ # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] extern_calls = list() def populate_extern_calls(stmt): if isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern": extern_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_extern_calls) return extern_calls def collect_tir_buffer_info(npu_ops): """This is run prior to address assigning to collect tir buffer information for verification later on""" _npu_op_tir_buffers = dict() for npu_op in npu_ops: if isinstance(npu_op, vapi.NpuDmaOperation): _npu_op_tir_buffers[npu_op] = (npu_op.src.address, npu_op.dest.address) elif issubclass(type(npu_op), vapi.NpuBlockOperation): _npu_op_tir_buffers[npu_op] = ( npu_op.ifm.tiles.addresses[0], npu_op.ofm.tiles.addresses[0], npu_op.weights, npu_op.biases, ) return _npu_op_tir_buffers def _check_buffer(address, region, length, buffer_var): """Checks whether the buffer information is valid with original tir buffers. - If its constant, this will check the slice in the constant tensor has the values. - If its scratch, this will check the slice is within scratch and does not have conflicts with other scratch tensors. - If its input/output, this will check the address is zero """ inverse_region_map = { 0: tir_to_cs_translator.BufferType.constant, 1: tir_to_cs_translator.BufferType.scratch, 3: tir_to_cs_translator.BufferType.input, 4: tir_to_cs_translator.BufferType.output, } buffer_type = inverse_region_map[region] buffer_dtype = buffer_var.type_annotation.element_type.dtype dtype_bytes = np.iinfo(np.dtype(buffer_dtype)).bits // 8 if buffer_type == tir_to_cs_translator.BufferType.constant: ref = buffer_info[buffer_var].values hex_from = address * dtype_bytes * 2 hex_to = hex_from + length * dtype_bytes * 2 constant_hex = constant_hex_string[hex_from:hex_to] constant_tensor = np.frombuffer(bytearray.fromhex(constant_hex), dtype=buffer_dtype) np.array_equal(constant_tensor, ref) # Every buffer is adjusted to align to 16 bytes length = util.round_up(length, 16) # Mark these constants are read at least once constant_tensor_read_mask[address : address + length] = np.ones( length, dtype=buffer_dtype ) elif buffer_type == tir_to_cs_translator.BufferType.scratch: shape = list(buffer_info[buffer_var].shape) assert length == np.prod(shape) assert address < scratch_size size_in_bytes = int(np.prod(shape)) * dtype_bytes # Every buffer is adjusted to align to 16 bytes size_in_bytes = util.round_up(size_in_bytes, 16) assert address + size_in_bytes <= scratch_size # The scratch area should not be used by anyother buffer assert not scratch_allocation_mask[address : address + size_in_bytes].any() # The scratch area is marked as used scratch_allocation_mask[address : address + size_in_bytes] = np.ones( size_in_bytes, dtype="uint8" ) elif buffer_type == tir_to_cs_translator.BufferType.input: assert address == 0 else: assert buffer_type == tir_to_cs_translator.BufferType.output assert address == 0 def verify(npu_ops): """This wrapper verifies the allocated addresses matches with original tir buffers""" checked_buffers = set() def check_buffer(address, region, length, buffer_var): if buffer_var not in checked_buffers: _check_buffer(address, region, length, buffer_var) checked_buffers.add(buffer_var) for npu_op in npu_ops: if isinstance(npu_op, vapi.NpuDmaOperation): src_tir_buffer_var = npu_op_tir_buffers[npu_op][0].buffer_var check_buffer( npu_op.src.address, npu_op.src.region, npu_op.src.length, src_tir_buffer_var ) dest_tir_load = npu_op_tir_buffers[npu_op][1].buffer_var check_buffer( npu_op.dest.address, npu_op.dest.region, npu_op.dest.length, dest_tir_load, ) elif issubclass(type(npu_op), vapi.NpuBlockOperation): ifm_tir_buffer_var = npu_op_tir_buffers[npu_op][0].buffer_var ifm_length = ( npu_op.ifm.shape.height * npu_op.ifm.shape.width * npu_op.ifm.shape.depth ) check_buffer( npu_op.ifm.tiles.addresses[0], npu_op.ifm.region, ifm_length, ifm_tir_buffer_var, ) ofm_tir_buffer_var = npu_op_tir_buffers[npu_op][1].buffer_var ofm_length = ( npu_op.ofm.shape.height * npu_op.ofm.shape.width * npu_op.ofm.shape.depth ) check_buffer( npu_op.ofm.tiles.addresses[0], npu_op.ofm.region, ofm_length, ofm_tir_buffer_var, ) for idx, weight in enumerate(npu_op_tir_buffers[npu_op][2]): assert isinstance(weight, vapi.NpuAddressRange) check_buffer( npu_op.weights[idx].address, npu_op.weights[idx].region, npu_op.weights[idx].length, weight.address.buffer_var, ) for idx, bias in enumerate(npu_op_tir_buffers[npu_op][3]): assert isinstance(bias, vapi.NpuAddressRange) check_buffer( npu_op.biases[idx].address, npu_op.biases[idx].region, npu_op.biases[idx].length, bias.address.buffer_var, ) for test_case in test_cases: tir_mod = test_case["tir_module"] tir_mod["main"] = tir_mod["main"].with_attr("target", tvm.target.Target("ethos-u")) tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod) buffer_info = tir_to_cs_translator.extract_buffer_info(tir_mod, test_case["param_dict"]) extern_calls = extract_call_extern_list(tir_mod) _npu_ops = list() for extern_call in extern_calls: _npu_ops.append(tir_to_cs_translator.translate_ethosu_tir_call_extern(extern_call)) npu_op_tir_buffers = collect_tir_buffer_info(_npu_ops) _npu_ops, constant_hex_string, scratch_size = tir_to_cs_translator.assign_addresses( buffer_info, _npu_ops ) scratch_allocation_mask = np.zeros(scratch_size, dtype="uint8") constant_tensor_read_mask = np.zeros(len(constant_hex_string) // 2, dtype="uint8") verify(_npu_ops) # This will be only 1 if all allocated scratch is used. assert np.prod(scratch_allocation_mask) == 1 # This will be only 1 if all constant tensors is read at least once. assert np.prod(constant_tensor_read_mask) == 1 # fmt: off """A ethosu_pooling tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuPooling: @T.prim_func def main(placeholder: T.handle, placeholder_3: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_4 = T.match_buffer(placeholder, [1, 5, 9, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 5, 5, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_pooling", "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_4.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 5, 3, 5, 0, 5, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 15, 3, 1, "AVG", 2, 3, 2, 1, 1, 1, 1, 1, 1, 0, "CLIP", 10, 100, "TFL", "NONE", dtype="int8")) __tvm_meta__ = None # fmt: on def test_translate_ethosu_pooling(): def extract_ethosu_pooling_extern_call(mod): # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_pooling_calls = list() def populate_ethosu_pooling_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_pooling" ): ethosu_pooling_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_pooling_calls) return ethosu_pooling_calls[0] pooling_call = extract_ethosu_pooling_extern_call(SingleEthosuPooling) npu_op = tir_to_cs_translator.translate_ethosu_pooling(pooling_call) assert npu_op.ifm.data_type == vapi.NpuDataType.INT8 assert npu_op.ifm.shape == vapi.NpuShape3D(5, 9, 3) assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0 assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1 assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0 assert npu_op.ifm.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ifm.layout == vapi.NpuLayout.NHWC assert npu_op.ifm.strides == vapi.NpuShape3D(27, 3, 1) # Compare OFM assert npu_op.ofm.data_type == vapi.NpuDataType.INT8 assert npu_op.ofm.shape == vapi.NpuShape3D(5, 5, 3) assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(5, 0, 5, [0, 0, 0, 0]).height_0 assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(5, 0, 5, [0, 0, 0, 0]).height_1 assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(5, 0, 5, [0, 0, 0, 0]).width_0 assert npu_op.ofm.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ofm.layout == vapi.NpuLayout.NHWC assert npu_op.ofm.strides == vapi.NpuShape3D(15, 3, 1) # Compare pooling_type assert npu_op.sub_op_type == vapi.NpuPoolingOp.AVERAGE # Compare kernel and padding assert ( npu_op.kernel.__dict__ == vapi.NpuKernel(w=2, h=3, stride_x=2, stride_y=1, dilation_x=1, dilation_y=1).__dict__ ) assert npu_op.padding == vapi.NpuPadding(top=1, left=1, bottom=1, right=0) # Compare activation assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU assert npu_op.activation.min == 10 assert npu_op.activation.max == 100 # Compare rounding mode assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL # Compare ifm upscaling assert npu_op.ifm_upscale == vapi.NpuResamplingMode.NONE # fmt: off """A ethosu_binary_elementwise ADD tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseAdd: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer( placeholder, [270], dtype="int8", elem_offset=0, align=128, offset_factor=1 ) ethosu_write_2 = T.match_buffer( ethosu_write, [1, 5, 9, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1 ) # body T.evaluate(T.call_extern( "ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 135), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "ADD", 0, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise SUB tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseSub: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 5, 9, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 135), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "SUB", 0, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise MUL tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseMul: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 5, 9, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 135), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "MUL", 0, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise MIN tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseMin: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 5, 9, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 135), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "MIN", 0, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise Max tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseMax: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [270], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 5, 9, 3], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", placeholder_2.data, 135), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int8", 5, 9, 3, 5, 0, 9, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "MAX", 0, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise SHR tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseShr: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [270], dtype="int32", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 5, 9, 3], dtype="int32", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 5, 9, 3, 5, 0, 9, T.load("int32", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, T.load("int32", placeholder_2.data, 135), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, T.load("int32", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "SHR", 0, "NONE", 0, 0, "TFL", dtype="int32")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise SHL tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseShl: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [270], dtype="int32", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 5, 9, 3], dtype="int32", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 5, 9, 3, 5, 0, 9, T.load("int32", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, T.load("int32", placeholder_2.data, 135), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "int32", 5, 9, 3, 5, 0, 9, T.load("int32", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 27, 3, 1, "SHL", 0, "CLIP", 10, 100, "TFL", dtype="int32")) __tvm_meta__ = None # fmt: on @pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX", "SHR", "SHL"]) def test_translate_ethosu_binary_elementwise(operator_type): if operator_type == "SHR" or operator_type == "SHL": data_type = vapi.NpuDataType.INT32 data_type_bytes = 4 else: data_type = vapi.NpuDataType.INT8 data_type_bytes = 1 def extract_ethosu_binary_elementwise_call_extern(mod): # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_binary_elementwise_calls = list() def populate_ethosu_binary_elementwise_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_binary_elementwise" ): ethosu_binary_elementwise_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_binary_elementwise_calls) return ethosu_binary_elementwise_calls[0] if operator_type == "ADD": binary_elementwise = SingleEthosuBinaryElementwiseAdd elif operator_type == "SUB": binary_elementwise = SingleEthosuBinaryElementwiseSub elif operator_type == "MUL": binary_elementwise = SingleEthosuBinaryElementwiseMul elif operator_type == "MIN": binary_elementwise = SingleEthosuBinaryElementwiseMin elif operator_type == "MAX": binary_elementwise = SingleEthosuBinaryElementwiseMax elif operator_type == "SHR": binary_elementwise = SingleEthosuBinaryElementwiseShr elif operator_type == "SHL": binary_elementwise = SingleEthosuBinaryElementwiseShl binary_elementwise_call = extract_ethosu_binary_elementwise_call_extern(binary_elementwise) npu_op = tir_to_cs_translator.translate_ethosu_binary_elementwise(binary_elementwise_call) # Compare IFM assert npu_op.ifm.data_type == data_type assert npu_op.ifm.shape == vapi.NpuShape3D(5, 9, 3) assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0 assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1 assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0 assert npu_op.ifm.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ifm.layout == vapi.NpuLayout.NHWC assert npu_op.ifm.strides == vapi.NpuShape3D( 27 * data_type_bytes, 3 * data_type_bytes, 1 * data_type_bytes ) # Compare IFM2 assert npu_op.ifm2.data_type == data_type assert npu_op.ifm2.shape == vapi.NpuShape3D(5, 9, 3) assert npu_op.ifm2.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0 assert npu_op.ifm2.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1 assert npu_op.ifm2.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0 assert npu_op.ifm2.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ifm2.layout == vapi.NpuLayout.NHWC assert npu_op.ifm2.strides == vapi.NpuShape3D( 27 * data_type_bytes, 3 * data_type_bytes, 1 * data_type_bytes ) # Compare OFM assert npu_op.ofm.data_type == data_type assert npu_op.ofm.shape == vapi.NpuShape3D(5, 9, 3) assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_0 assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).height_1 assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(5, 0, 9, [0, 0, 0, 0]).width_0 assert npu_op.ofm.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ofm.layout == vapi.NpuLayout.NHWC assert npu_op.ofm.strides == vapi.NpuShape3D( 27 * data_type_bytes, 3 * data_type_bytes, 1 * data_type_bytes ) # Compare op type if operator_type == "ADD": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.ADD elif operator_type == "SUB": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SUB elif operator_type == "MUL": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MUL elif operator_type == "MIN": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MIN elif operator_type == "MAX": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MAX elif operator_type == "SHR": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHR elif operator_type == "SHL": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHL # Compare reversed_operands assert npu_op.reversed_operands == False # Compare activation if operator_type == "SHR": assert npu_op.activation is None else: assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU assert npu_op.activation.min == 10 assert npu_op.activation.max == 100 # Compare rounding mode assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL # fmt: off """A ethosu_binary_elementwise ADD with broadcasting tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseAddBroadcasting: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 2, 3, 4], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "ADD", 1, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise SUB with broadcasting tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseSubBroadcasting: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 2, 3, 4], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "SUB", 1, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise MUL with broadcasting tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseMulBroadcasting: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 2, 3, 4], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "MUL", 1, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise MIN with broadcasting tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseMinBroadcasting: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 2, 3, 4], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "MIN", 1, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise MAX with broadcasting tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseMaxBroadcasting: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [27], dtype="int8", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 2, 3, 4], dtype="int8", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 2, 3, 4, 2, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int8", 1, 3, 1, 1, 0, 3, T.load("int8", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int8", 2, 3, 4, 2, 0, 3, T.load("int8", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "MAX", 1, "CLIP", 10, 100, "TFL", dtype="int8")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise SHR with broadcasting tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseShrBroadcasting: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [27], dtype="int32", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 2, 3, 4], dtype="int32", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 2, 3, 4, 2, 0, 3, T.load("int32", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int32", 1, 3, 1, 1, 0, 3, T.load("int32", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int32", 2, 3, 4, 2, 0, 3, T.load("int32", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "SHR", 1, "NONE", 0, 0, "TFL", dtype="int32")) __tvm_meta__ = None # fmt: on # fmt: off """A ethosu_binary_elementwise SHL with broadcasting tir testcase for the translator""" @tvm.script.ir_module class SingleEthosuBinaryElementwiseShlBroadcasting: @T.prim_func def main(placeholder: T.handle, ethosu_write: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) placeholder_2 = T.match_buffer(placeholder, [27], dtype="int32", elem_offset=0, align=128, offset_factor=1) ethosu_write_2 = T.match_buffer(ethosu_write, [1, 2, 3, 4], dtype="int32", elem_offset=0, align=128, offset_factor=1) # body T.evaluate(T.call_extern("ethosu_binary_elementwise", "int32", 2, 3, 4, 2, 0, 3, T.load("int32", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "int32", 1, 3, 1, 1, 0, 3, T.load("int32", placeholder_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 1, 1, 1, "int32", 2, 3, 4, 2, 0, 3, T.load("int32", ethosu_write_2.data, 0), 0, 0, 0, T.float32(1.0), 0, "NHWC", 12, 4, 1, "SHL", 1, "CLIP", 10, 100, "TFL", dtype="int32")) __tvm_meta__ = None # fmt: on @pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX", "SHR", "SHL"]) def test_translate_ethosu_binary_elementwise_broadcasting(operator_type): if operator_type == "SHR" or operator_type == "SHL": data_type = vapi.NpuDataType.INT32 data_type_bytes = 4 else: data_type = vapi.NpuDataType.INT8 data_type_bytes = 1 def extract_ethosu_binary_elementwise_broadcasting_call_extern(mod): # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_binary_elementwise_calls = list() def populate_ethosu_binary_elementwise_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_binary_elementwise" ): ethosu_binary_elementwise_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_binary_elementwise_calls) return ethosu_binary_elementwise_calls[0] if operator_type == "ADD": binary_elementwise = SingleEthosuBinaryElementwiseAddBroadcasting elif operator_type == "SUB": binary_elementwise = SingleEthosuBinaryElementwiseSubBroadcasting elif operator_type == "MUL": binary_elementwise = SingleEthosuBinaryElementwiseMulBroadcasting elif operator_type == "MIN": binary_elementwise = SingleEthosuBinaryElementwiseMinBroadcasting elif operator_type == "MAX": binary_elementwise = SingleEthosuBinaryElementwiseMaxBroadcasting elif operator_type == "SHR": binary_elementwise = SingleEthosuBinaryElementwiseShrBroadcasting elif operator_type == "SHL": binary_elementwise = SingleEthosuBinaryElementwiseShlBroadcasting binary_elementwise_call = extract_ethosu_binary_elementwise_broadcasting_call_extern( binary_elementwise ) npu_op = tir_to_cs_translator.translate_ethosu_binary_elementwise(binary_elementwise_call) # Compare IFM assert npu_op.ifm.data_type == data_type assert npu_op.ifm.shape == vapi.NpuShape3D(2, 3, 4) assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_0 assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_1 assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).width_0 assert npu_op.ifm.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ifm.layout == vapi.NpuLayout.NHWC assert npu_op.ifm.strides == vapi.NpuShape3D( 12 * data_type_bytes, 4 * data_type_bytes, 1 * data_type_bytes ) # Compare IFM2 assert npu_op.ifm2.data_type == data_type assert npu_op.ifm2.shape == vapi.NpuShape3D(1, 3, 1) assert npu_op.ifm2.tiles.height_0 == vapi.NpuTileBox(1, 0, 3, [0, 0, 0, 0]).height_0 assert npu_op.ifm2.tiles.height_1 == vapi.NpuTileBox(1, 0, 3, [0, 0, 0, 0]).height_1 assert npu_op.ifm2.tiles.width_0 == vapi.NpuTileBox(1, 0, 3, [0, 0, 0, 0]).width_0 assert npu_op.ifm2.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ifm2.layout == vapi.NpuLayout.NHWC assert npu_op.ifm2.strides == vapi.NpuShape3D( 1 * data_type_bytes, 1 * data_type_bytes, 1 * data_type_bytes ) # Compare OFM assert npu_op.ofm.data_type == data_type assert npu_op.ofm.shape == vapi.NpuShape3D(2, 3, 4) assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_0 assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).height_1 assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(2, 0, 3, [0, 0, 0, 0]).width_0 assert npu_op.ofm.quantization == vapi.NpuQuantization(1.0, 0) assert npu_op.ofm.layout == vapi.NpuLayout.NHWC assert npu_op.ofm.strides == vapi.NpuShape3D( 12 * data_type_bytes, 4 * data_type_bytes, 1 * data_type_bytes ) # Compare op type if operator_type == "ADD": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.ADD elif operator_type == "SUB": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SUB elif operator_type == "MUL": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MUL elif operator_type == "MIN": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MIN elif operator_type == "MAX": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.MAX elif operator_type == "SHR": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHR elif operator_type == "SHL": assert npu_op.sub_op_type == vapi.NpuElementWiseOp.SHL # Compare reversed_operands assert npu_op.reversed_operands == True # Compare activation if operator_type == "SHR": assert npu_op.activation is None else: assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU assert npu_op.activation.min == 10 assert npu_op.activation.max == 100 # Compare rounding mode assert npu_op.rounding_mode == vapi.NpuRoundingMode.TFL if __name__ == "__main__": pytest.main([__file__])