/* * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*!
 * \file runtime/contrib/tensorrt/tensorrt_ops.h
 * \brief Converters from Relay ops into TensorRT layers. Converters should
 * inherit from TensorRTOpConverter and implement the Convert() method.
 */

#ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_
#define TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "../json/json_node.h"
#include "NvInfer.h"
#include "tensorrt_utils.h"

#if TRT_VERSION_GE(6, 0, 1)
#define TRT_HAS_IMPLICIT_BATCH(params) (params->network->hasImplicitBatchDimension())
#else
#define TRT_HAS_IMPLICIT_BATCH(params) (true)
#endif

namespace tvm {
namespace runtime {
namespace contrib {

using JSONGraphNode = tvm::runtime::json::JSONGraphNode;

/*!
 * \brief An input to a op may be either kTensor in the case of nvinfer::ITensor*
 * or kWeight for nvinfer1::Weights.
 */
enum TensorRTInputType {
  kTensor,
  kWeight,
};

/*!
 * \brief An input to a TensorRTOpConverter. The type of the input is either kTensor
 * or kWeight. For kTensor, "tensor" contains the input tensor. For kWeight,
 * "weight" contains the input weight and "weight_shape" contains the shape.
 */
struct TensorRTOpInput {
  /*! \brief If type is kTensor, will store input tensor. */
  nvinfer1::ITensor* tensor;

  /*! \brief If type is kWeight, will store input weight. */
  nvinfer1::Weights weight;

  /*! \brief Whether the input is in tensor or weight. */
  TensorRTInputType type;

  /*! \brief If type is kWeight, will store weight shape. */
  std::vector<int> weight_shape;

  explicit TensorRTOpInput(nvinfer1::ITensor* tensor)
      : tensor(tensor), weight({nvinfer1::DataType::kFLOAT, nullptr, 0}), type(kTensor) {}
  TensorRTOpInput(nvinfer1::Weights weight, const std::vector<int>& shape)
      : tensor(nullptr), weight(weight), type(kWeight), weight_shape(shape) {}
};

/*! \brief Parameters to convert an Op from Relay to TensorRT. */
struct TensorRTOpConverterParams {
  /*! \brief The TRT network that the new layer should be added to. */
  nvinfer1::INetworkDefinition* network;
  /*! \brief The corresponding serialized node. */
  const JSONGraphNode& node;
  /*! \brief The type of op. */
  std::string op_name;
  /*! \brief Inputs to the op. */
  std::vector<TensorRTOpInput> inputs;
  /*! \brief Outputs of the op should be populated here during Convert(). */
  std::vector<nvinfer1::ITensor*> outputs;
  /*! \brief Any newly allocated weights should be stored here also. */
  std::vector<nvinfer1::Weights>* trt_weights;

  TensorRTOpConverterParams(nvinfer1::INetworkDefinition* network, const JSONGraphNode& node,
                            std::vector<nvinfer1::Weights>* trt_weights)
      : network(network), node(node), trt_weights(trt_weights) {
    op_name = node.GetOpName();
  }
};

/*! \brief Base class for an op converter from Relay to TRT. */
class TensorRTOpConverter {
 public:
  /*! \brief Used to specify whether each input is tensor or weight. */
  const std::vector<TensorRTInputType> input_types;
  /*! \brief If set to true, any number of tensor inputs can be used for the op.
   */
  const bool variable_input_count;

  /*!
   * \brief Converter subclasses should call this constructor to set
   * input_types or variable_input_count.
   * \param input_types For each input to the op, there should be a
   * corresponding entry in input_types to determine whether that input should
   * be a tensor or a weight. TensorRTBuilder will prepare inputs in
   * TensorRTOpConverter according to this.
   * \param variable_input_count If the op can have multiple inputs, set this to
   * true. input_types vector will be ignored and any number of input tensors
   * can be used for this op. All inputs will be tensors and not weights.
   */
  explicit TensorRTOpConverter(const std::vector<TensorRTInputType>& input_types,
                               bool variable_input_count = false);

  /*!
   * \brief Convert to TRT. Implementation should use inputs and attributes
   * from the CallNode to add the corresponding TRT layers to network. Outputs
   * should be pushed to outputs vector.
   * \param params Parameters for this op.
   */
  virtual void Convert(TensorRTOpConverterParams* params) const = 0;

  /*!
   * \brief Helper function to reshape a tensor.
   * \param params Parameters for this op.
   * \param input Tensor to reshape.
   * \param new_shape New shape, does not include batch dim.
   * \return Reshaped tensor
   */
  nvinfer1::ITensor* Reshape(TensorRTOpConverterParams* params, nvinfer1::ITensor* input,
                             const std::vector<int>& new_shape) const;

  /*!
   * \brief Helper function to transpose a tensor.
   * \param params Parameters for this op.
   * \param input Tensor to transpose.
   * \param order New order of axes, does include batch dim.
   * \return Transposed tensor
   */
  nvinfer1::ITensor* Transpose(TensorRTOpConverterParams* params, nvinfer1::ITensor* input,
                               const std::vector<int>& order) const;

  /*!
   * \brief Helper function to convert an axis to TRT format.
   * \param axis Axis from TVM.
   * \param input_rank Rank of input, does not include batch dim.
   * \return Axis in TRT format.
   */
  int ConvertAxis(TensorRTOpConverterParams* params, int axis, int input_rank) const;

  /*!
   * \brief Create constant that is broadcastable.
   * \param params Parameters for this op.
   * \param value Value of scalar.
   * \param broadcast_to_dims Dims that scalar should be broadcastable against.
   * \return Constant tensor.
   */
  nvinfer1::ITensor* CreateScalar(TensorRTOpConverterParams* params, float value,
                                  const nvinfer1::Dims& broadcast_to_dims) const;

  /*!
   * \brief Get pre/post padding values from padding attributes array.
   * \param padding Serialized padding from op attributes.
   * \param padding_is_asymmetric True if both pre and post are needed for asymmetric padding.
   * \param prepadding Prepadding value or symmetric padding values if !padding_is_asymmetric.
   * \param postpadding Postpadding value if padding_is_asymmetric.
   */
  void GetPadding(const std::vector<std::string>& padding, bool* use_asymmetric_padding,
                  nvinfer1::DimsHW* prepadding, nvinfer1::DimsHW* postpadding) const;

  /*!
   * \brief Get pre/post padding values from padding attributes array for volumetric ops.
   * \param padding Serialized padding from op attributes.
   * \param padding_is_asymmetric True if both pre and post are needed for asymmetric padding.
   * \param prepadding Prepadding value or symmetric padding values if !padding_is_asymmetric.
   * \param postpadding Postpadding value if padding_is_asymmetric.
   */
  void GetPadding3D(const std::vector<std::string>& padding, bool* use_asymmetric_padding,
                    nvinfer1::Dims* prepadding, nvinfer1::Dims* postpadding) const;
};

/*!
 * \brief Get the map of available TensorRTOpConverters, where the key is the name of the relay op.
 * \return Map of TensorRTOpConverters.
 */
const std::shared_ptr<std::unordered_map<std::string, std::shared_ptr<TensorRTOpConverter>>>
GetOpConverters();

}  // namespace contrib
}  // namespace runtime
}  // namespace tvm

#endif  // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_