#!/bin/bash

# This file contains all customizable configuration items for the project

######################################################################
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
# SPDX-License-Identifier: MIT-0                                     #
######################################################################

# Model settings
huggingface_model_name=bert-base-multilingual-cased
huggingface_tokenizer_class=BertTokenizer
huggingface_model_class=BertForQuestionAnswering

# Compiler settings
# processor = cpu|gpu|inf1|inf2|arm
processor=cpu
pipeline_cores=1
sequence_length=128
batch_size=1
test=True

# account is the current AWS user account. This setting is determined automatically.
account=$(aws sts get-caller-identity --query Account --output text)

# region is used to login if the registry is ecr 
region=us-east-1

# Container settings
# Default is the private ECR registry in the current AWS account.
# If registry is set, include the registry uri up to the image name,
# end the registry setting with /
registry=${account}.dkr.ecr.${region}.amazonaws.com/
# registry_type=ecr
registry_type=ecr
base_image_name=aws-do-inference-base
base_image_tag=:v9-${processor}
model_image_name=${huggingface_model_name}
model_image_tag=:v9-${processor}

# Trace settings
# trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
# This setting will be automatically assigned based on your processor value
trace_opts_cpu=""
trace_opts_gpu="--gpus 0"
trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_arm=""

# Deployment settings
# some of these settings apply only when the runtime is kubernetes
# runtime = docker | kubernetes
runtime=kubernetes
# number of models per model server
num_models=15
# quiet = False | True - sets whether the model server should print logs
quiet=False
# postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
postprocess=True
# service_port=8080 - port on which model service will be exposed
service_port=8080
# Kubernetes-specific deployment settings
# instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf1.6xlarge | ...
# A node group with the specified instance_type must exist in the cluster
# The instance type must have the processor configured above
# Example: processor=arm, instance_type=c7g.4xlarge
instance_type=c5.4xlarge
# num_servers - number of model servers to deploy
# note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
# example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
num_servers=1
# Kubernetes namespace
namespace=mpi
# Kubernetes app name
app_name=${huggingface_model_name}-${processor}
app_dir=app-${app_name}-${instance_type}

# Test settings
test_image_name=test-${huggingface_model_name}
test_image_tag=:v9-cpu
# request_frequency - time to sleep between two consecutive requests in curl tests
request_frequency=0.01
# Stop random request test after num_requests number of requests
num_requests=30
# Number of test containers to launch (default=1), use > 1 for scale testing
num_test_containers=1
# test_instance_type - when runtime is kubernetes, instance type on which test pods will run
test_instance_type=c5.4xlarge
# test_namespace - when runtime is kubernetes, namespace where test pods will be created
test_namespace=mpi
# test_dir - when runtime is kubernetes, directory where test pod manifests are stored
test_dir=app-${test_image_name}-${instance_type}