name: "bert_base" max_batch_size: 0 platform: "onnxruntime_onnx" default_model_filename: "model.bin" input [ { name: "input_ids" data_type: TYPE_INT64 dims: [-1, -1] }, { name: "token_type_ids" data_type: TYPE_INT64 dims: [-1, -1] }, { name: "attention_mask" data_type: TYPE_INT64 dims: [-1, -1] } ] output [ { name: "output" data_type: TYPE_FP32 dims: [-1, -1, 30522] } ] instance_group [ { count: 1 kind: KIND_GPU } ]