name: "bert-trt"
platform: "tensorrt_plan"
max_batch_size: 16
input [
  {
    name: "token_ids"
    data_type: TYPE_INT32
    dims: [128]
  },
  {
    name: "attn_mask"
    data_type: TYPE_INT32
    dims: [128]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [128, 384]
  },
  {
    name: "pooled_embeds"
    data_type: TYPE_FP32
    dims: [384]
  }
]
instance_group [
    {
      kind: KIND_GPU
    }
  ]