# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import tvm from tvm import te import numpy as np from tvm import relay from tvm.contrib import graph_executor import tvm.topi.testing # "unquantize" a quantized tensor def recover(data, scale, zp): return scale * (np.asarray(data) - zp) def generate_golden_output(x_recovered, y_recovered, scale, zp): mul = x_recovered * y_recovered output = np.around(mul / scale + zp) q_min = np.iinfo(np.uint8).min q_max = np.iinfo(np.uint8).max return np.clip(output, q_min, q_max) def test_tflite_same_io_qnn_params(): data_dtype = "uint8" lhs_scale = rhs_scale = output_scale = 0.00784314 lhs_zero_point = rhs_zero_point = output_zero_point = 127 x = relay.var("x", shape=(1, 4), dtype=data_dtype) y = relay.var("y", shape=(1, 4), dtype=data_dtype) z = relay.qnn.op.mul( lhs=x, rhs=y, lhs_scale=relay.const(lhs_scale, "float32"), lhs_zero_point=relay.const(lhs_zero_point, "int32"), rhs_scale=relay.const(rhs_scale, "float32"), rhs_zero_point=relay.const(rhs_zero_point, "int32"), output_scale=relay.const(output_scale, "float32"), output_zero_point=relay.const(output_zero_point, "int32"), ) func = relay.Function([x, y], z) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] x_datas = [ np.array((1, 153, 2, 178)).reshape((1, 4)), np.array((25, 1, 178, 216)).reshape((1, 4)), np.array((25, 153, 1, 165)).reshape((1, 4)), ] y_datas = [ np.array((204, 178, 1, 8)).reshape((1, 4)), np.array((204, 178, 191, 1)).reshape((1, 4)), np.array((204, 178, 1, 191)).reshape((1, 4)), ] for i in range(0, 3): x_data = x_datas[i] y_data = y_datas[i] x_rec = recover(x_data, lhs_scale, lhs_zero_point) y_rec = recover(y_data, rhs_scale, rhs_zero_point) golden = generate_golden_output(x_rec, y_rec, output_scale, output_zero_point) op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)( x_data, y_data ) np.testing.assert_equal(op_res.numpy(), np.uint8(golden)) def test_tflite_different_io_qnn_params(): data_dtype = "uint8" lhs_scale = 0.0156863 lhs_zero_point = 127 rhs_scale = 0.0117647 rhs_zero_point = 85 output_scale = 0.0235294 output_zero_point = 128 x = relay.var("x", shape=(1, 4), dtype=data_dtype) y = relay.var("y", shape=(1, 4), dtype=data_dtype) z = relay.qnn.op.mul( lhs=x, rhs=y, lhs_scale=relay.const(lhs_scale, "float32"), lhs_zero_point=relay.const(lhs_zero_point, "int32"), rhs_scale=relay.const(rhs_scale, "float32"), rhs_zero_point=relay.const(rhs_zero_point, "int32"), output_scale=relay.const(output_scale, "float32"), output_zero_point=relay.const(output_zero_point, "int32"), ) func = relay.Function([x, y], z) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] x_datas = [ np.array((76, 140, 153, 172)).reshape((1, 4)), np.array((133, 140, 146, 153)).reshape((1, 4)), np.array((76, 140, 172, 146)).reshape((1, 4)), ] y_datas = [ np.array((136, 119, 128, 17)).reshape((1, 4)), np.array((136, 119, 111, 94)).reshape((1, 4)), np.array((136, 119, 17, 128)).reshape((1, 4)), ] for i in range(0, 3): x_data = x_datas[i] y_data = y_datas[i] x_rec = recover(x_data, lhs_scale, lhs_zero_point) y_rec = recover(y_data, rhs_scale, rhs_zero_point) golden = generate_golden_output(x_rec, y_rec, output_scale, output_zero_point) op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)( x_data, y_data ) np.testing.assert_equal(op_res.numpy(), np.uint8(golden)) def test_saturation(): # Same params data_dtype = "uint8" lhs_scale = rhs_scale = output_scale = 0.125 lhs_zero_point = rhs_zero_point = output_zero_point = 0 x = relay.var("x", shape=(1, 4), dtype=data_dtype) y = relay.var("y", shape=(1, 4), dtype=data_dtype) z = relay.qnn.op.mul( lhs=x, rhs=y, lhs_scale=relay.const(lhs_scale, "float32"), lhs_zero_point=relay.const(lhs_zero_point, "int32"), rhs_scale=relay.const(rhs_scale, "float32"), rhs_zero_point=relay.const(rhs_zero_point, "int32"), output_scale=relay.const(output_scale, "float32"), output_zero_point=relay.const(output_zero_point, "int32"), ) func = relay.Function([x, y], z) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] x_data = np.array((255, 1, 1, 0)).reshape((1, 4)) y_data = np.array((255, 255, 128, 0)).reshape((1, 4)) x_rec = recover(x_data, lhs_scale, lhs_zero_point) y_rec = recover(y_data, rhs_scale, rhs_zero_point) golden = generate_golden_output(x_rec, y_rec, output_scale, output_zero_point) op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)( x_data, y_data ) np.testing.assert_equal(op_res.numpy(), np.uint8(golden)) # Same params, different scale lhs_scale = rhs_scale = 0.125 output_scale = 0.25 z = relay.qnn.op.mul( lhs=x, rhs=y, lhs_scale=relay.const(lhs_scale, "float32"), lhs_zero_point=relay.const(lhs_zero_point, "int32"), rhs_scale=relay.const(rhs_scale, "float32"), rhs_zero_point=relay.const(rhs_zero_point, "int32"), output_scale=relay.const(output_scale, "float32"), output_zero_point=relay.const(output_zero_point, "int32"), ) func = relay.Function([x, y], z) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] x_data = np.array((255, 1, 1, 0)).reshape((1, 4)) y_data = np.array((255, 255, 127, 0)).reshape((1, 4)) x_rec = recover(x_data, lhs_scale, lhs_zero_point) y_rec = recover(y_data, rhs_scale, rhs_zero_point) golden = generate_golden_output(x_rec, y_rec, output_scale, output_zero_point) op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)( x_data, y_data ) np.testing.assert_equal(op_res.numpy(), np.uint8(golden)) # All params different lhs_scale = 0.5 rhs_scale = 0.25 output_scale = 0.125 z = relay.qnn.op.mul( lhs=x, rhs=y, lhs_scale=relay.const(lhs_scale, "float32"), lhs_zero_point=relay.const(lhs_zero_point, "int32"), rhs_scale=relay.const(rhs_scale, "float32"), rhs_zero_point=relay.const(rhs_zero_point, "int32"), output_scale=relay.const(output_scale, "float32"), output_zero_point=relay.const(output_zero_point, "int32"), ) func = relay.Function([x, y], z) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] x_data = np.array((255, 0, 1, 0)).reshape((1, 4)) y_data = np.array((0, 128, 64, 0)).reshape((1, 4)) x_rec = recover(x_data, lhs_scale, lhs_zero_point) y_rec = recover(y_data, rhs_scale, rhs_zero_point) golden = generate_golden_output(x_rec, y_rec, output_scale, output_zero_point) op_res = relay.create_executor("graph", device=tvm.cpu(0), target="llvm").evaluate(func)( x_data, y_data ) np.testing.assert_equal(op_res.numpy(), np.uint8(golden)) if __name__ == "__main__": test_tflite_same_io_qnn_params() test_tflite_different_io_qnn_params() test_saturation()