name: "bert" platform: "tensorrt_plan" max_batch_size: 128 input [ { name: "token_ids" data_type: TYPE_INT32 dims: [128] }, { name: "attn_mask" data_type: TYPE_INT32 dims: [128] } ] output [ { name: "output" data_type: TYPE_FP32 dims: [128, 768] }, { name: "1634" data_type: TYPE_FP32 dims: [768] } ] instance_group { count: 1 kind: KIND_GPU } dynamic_batching { preferred_batch_size: 16 }