/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /*! * \file src/relay/op/contrib/ethosu/depthwise.cc * \brief Depthwise convolution 2D operator definition for the Arm(R) Ethos(TM)-U NPU */ #include #include #include #include #include #include "../../../qnn/utils.h" #include "../../nn/convolution.h" #include "common.h" namespace tvm { namespace relay { namespace op { namespace contrib { namespace ethosu { /*! \brief Attributes used by the Ethos(TM)-U NPU depthwise operator */ struct EthosuDepthwiseConv2DAttrs : public tvm::AttrsNode { double ifm_scale; int ifm_zero_point; int weight_zero_point; double ofm_scale; int ofm_zero_point; Array kernel_shape; IndexExpr ofm_channels; Array strides; Array padding; Array dilation; String activation; int clip_min; int clip_max; String rounding_mode; String upscale; String ifm_layout; String ofm_layout; String ofm_dtype; TVM_DECLARE_ATTRS(EthosuDepthwiseConv2DAttrs, "relay.attrs.EthosuDepthwiseConv2DAttrs") { TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); TVM_ATTR_FIELD(ifm_zero_point) .describe("The quantization zero point for the Output Feature Map tensor."); TVM_ATTR_FIELD(weight_zero_point) .describe("The quantization zero point for the weight tensor."); TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); TVM_ATTR_FIELD(ofm_zero_point) .describe("The quantization zero point for the Output Feature Map tensor."); TVM_ATTR_FIELD(kernel_shape) .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).") .set_default(NullValue >()); TVM_ATTR_FIELD(ofm_channels) .describe("The number of OFM channels.") .set_default(NullValue()); TVM_ATTR_FIELD(strides) .describe("The 2 dimensional strides as (stride_height, stride_width).") .set_default(Array({1, 1})); TVM_ATTR_FIELD(padding) .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right)") .set_default(Array({0, 0, 0, 0})); TVM_ATTR_FIELD(dilation) .describe("The 2 dimensional dilation as (dilation_height, dilation_width).") .set_default(Array({1, 1})); TVM_ATTR_FIELD(activation) .describe( "Description: The activation function to use." "'NONE' - no activation function." "'CLIP' - clip the output between clip_min and clip_max." "'TANH - tanh activation function." "'SIGMOID' - sigmoid activation function." "'LUT' - use a look-up table to perform the activation function.") .set_default("NONE"); TVM_ATTR_FIELD(clip_min) .describe("The minimum clipping value if activation = CLIP.") .set_default(0); TVM_ATTR_FIELD(clip_max) .describe("The maximum clipping value if activation = CLIP.") .set_default(0); TVM_ATTR_FIELD(rounding_mode) .describe( "The rounding mode to apply to the Output Feature Map tensor. " "'TFL' - Tensorflow Lite rounding scheme. " "'TRUNCATE' - Truncate towards zero." "'NATURAL' - Round to nearest value, with x.5 rounded up towards +infinity.") .set_default("TFL"); TVM_ATTR_FIELD(upscale) .describe( "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " "'NONE' - no upscaling. " "'NEAREST' - upscale using nearest neighbour. " "'ZEROS' - upscale using zeros.") .set_default("NONE"); TVM_ATTR_FIELD(ifm_layout) .set_default("NHWC") .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); TVM_ATTR_FIELD(ofm_layout) .set_default("NHWC") .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); TVM_ATTR_FIELD(ofm_dtype) .describe("The Output Feature Map tensor data type. Can be 'int8', 'uint8' or 'int16'.") .set_default("int8"); } }; TVM_REGISTER_NODE_TYPE(EthosuDepthwiseConv2DAttrs); bool EthosuDepthwiseConv2DRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { ICHECK_EQ(types.size(), 5); const auto* ifm = types[0].as(); const auto* weight = types[1].as(); const auto* scale_bias = types[2].as(); if (ifm == nullptr || weight == nullptr) return false; const auto* param = attrs.as(); ICHECK(param != nullptr) << "EthosuDepthwiseConv2DAttrs cannot be nullptr."; DataType ofm_dtype; if (param->ofm_dtype == "int8") { ofm_dtype = DataType::Int(8); } else if (param->ofm_dtype == "uint8") { ofm_dtype = DataType::UInt(8); } else if (param->ofm_dtype == "int16") { ofm_dtype = DataType::Int(16); } else if (param->ofm_dtype == "int32") { ofm_dtype = DataType::Int(32); } if (ifm->dtype != DataType::UInt(8) && ifm->dtype != DataType::Int(8)) { reporter->GetDiagCtx().EmitFatal( Diagnostic::Error(reporter->GetSpan()) << "Invalid operator: expected ethosu_depthwise_conv2d input data type " << "of type(uint8) or type(int8) but was " << ifm->dtype); return false; } if (weight->dtype != DataType::UInt(8) && weight->dtype != DataType::Int(8)) { reporter->GetDiagCtx().EmitFatal( Diagnostic::Error(reporter->GetSpan()) << "Invalid operator: expected ethosu_depthwise_conv2d weight data type " << "of type(uint8) or type(int8) but was " << weight->dtype); return false; } if (scale_bias->dtype != DataType::UInt(8)) { reporter->GetDiagCtx().EmitFatal( Diagnostic::Error(reporter->GetSpan()) << "Invalid operator: expected ethosu_depthwise_conv2d scale bias data type " << "of type(uint8) but was " << scale_bias->dtype); return false; } if (ofm_dtype != DataType::UInt(8) && ofm_dtype != DataType::Int(8) && ofm_dtype != DataType::Int(16) && ofm_dtype != DataType::Int(32)) { reporter->GetDiagCtx().EmitFatal( Diagnostic::Error(reporter->GetSpan()) << "Invalid operator: expected ethosu_depthwise_conv2d output data type " << " type(uint8), type(int8), type(int16) or type(int32) for ofm but was " << ofm_dtype); return false; } // Collect the ifm, weight and ofm tensors for using in the inference function Array tensor_types = {types[0], types[1], types[4]}; // Assign weight type {ofm_channels, kernel_height, kernel_width, 1} reporter->Assign(types[1], TensorType({param->ofm_channels, param->kernel_shape[0], param->kernel_shape[1], weight->shape[3]}, weight->dtype)); // Assign ofm type auto ofm_shape = EthosuInferKernelOutput(ifm->shape, param->ifm_layout, param->ofm_layout, param->kernel_shape, param->ofm_channels, param->dilation, param->strides, param->padding); reporter->Assign(types[4], TensorType(ofm_shape, ofm_dtype)); return true; } Expr MakeEthosuDepthwiseConv2D(Expr ifm, Expr weight, Expr scale_bias, Expr lut, double ifm_scale, int ifm_zero_point, int weight_zero_point, double ofm_scale, int ofm_zero_point, Array kernel_shape, IndexExpr ofm_channels, Array strides, Array padding, Array dilation, String activation, int clip_min, int clip_max, String rounding_mode, String upscale, String ifm_layout, String ofm_layout, String ofm_dtype) { auto attrs = make_object(); attrs->ifm_scale = ifm_scale; attrs->ifm_zero_point = ifm_zero_point; attrs->weight_zero_point = weight_zero_point; attrs->ofm_scale = ofm_scale; attrs->ofm_zero_point = ofm_zero_point; attrs->kernel_shape = std::move(kernel_shape); attrs->ofm_channels = std::move(ofm_channels); attrs->strides = std::move(strides); attrs->padding = std::move(padding); attrs->dilation = std::move(dilation); attrs->activation = std::move(activation); attrs->clip_min = clip_min; attrs->clip_max = clip_max; attrs->rounding_mode = std::move(rounding_mode); attrs->upscale = std::move(upscale); attrs->ifm_layout = std::move(ifm_layout); attrs->ofm_layout = std::move(ofm_layout); attrs->ofm_dtype = std::move(ofm_dtype); static const Op& op = Op::Get("contrib.ethosu.depthwise_conv2d"); return Call(op, {ifm, weight, scale_bias, lut}, Attrs(attrs), {}); } TVM_REGISTER_GLOBAL("relay.op._make.ethosu_depthwise_conv2d") .set_body_typed(MakeEthosuDepthwiseConv2D); RELAY_REGISTER_OP("contrib.ethosu.depthwise_conv2d") .describe(R"code(Arm(R) Ethos(TM)-U NPU 2D quantized depthwise operator. This Relay operator corresponds to the hardware-implemented quantized depthwise operation found on Ethos(TM)-U NPU. It accepts either NHWC or NHCWB16 format for the input data (input feature map, or IFM) and OHWI format for the kernel weights. - **ifm**: NHWC - (1, ifm_height, ifm_width, ifm_channels) NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16) - **weight**: (ofm_channels, kernel_shape[0], kernel_shape[1], 1 (depth multiplier)) - **scale_bias**: (ofm_channels, 10) - **ofm**: (1, ofm_height, ofm_width, ofm_channels) )code" TVM_ADD_FILELINE) .set_attrs_type() .set_num_inputs(4) .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).") .add_argument("weight", "Tensor", "The weight tensor.") .add_argument("scale_bias", "Tensor", "The packed per-channel weight scale and bias tensor.") .add_argument("lut", "Tensor", "The look-up table of values to use if activation = 'LUT'") .set_support_level(11) .add_type_rel("EthosuDepthwiseConv2D", EthosuDepthwiseConv2DRel); } // namespace ethosu } // namespace contrib } // namespace op } // namespace relay } // namespace tvm