#!/bin/bash # This file contains all customizable configuration items for the project ###################################################################### # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # SPDX-License-Identifier: MIT-0 # ###################################################################### # Model settings huggingface_model_name=bert-base-multilingual-cased huggingface_tokenizer_class=BertTokenizer huggingface_model_class=BertForQuestionAnswering # Compiler settings # processor = cpu|gpu|inf1|inf2|arm processor=cpu pipeline_cores=1 sequence_length=128 batch_size=1 test=True # account is the current AWS user account. This setting is determined automatically. account=$(aws sts get-caller-identity --query Account --output text) # region is used to login if the registry is ecr region=us-east-1 # Container settings # Default is the private ECR registry in the current AWS account. # If registry is set, include the registry uri up to the image name, # end the registry setting with / registry=${account}.dkr.ecr.${region}.amazonaws.com/ # registry_type=ecr registry_type=ecr base_image_name=aws-do-inference-base base_image_tag=:v9-${processor} model_image_name=${huggingface_model_name} model_image_tag=:v9-${processor} # Trace settings # trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script # This setting will be automatically assigned based on your processor value trace_opts_cpu="" trace_opts_gpu="--gpus 0" trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged" trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged" trace_opts_arm="" # Deployment settings # some of these settings apply only when the runtime is kubernetes # runtime = docker | kubernetes runtime=kubernetes # number of models per model server num_models=15 # quiet = False | True - sets whether the model server should print logs quiet=False # postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned postprocess=True # service_port=8080 - port on which model service will be exposed service_port=8080 # Kubernetes-specific deployment settings # instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf1.6xlarge | ... # A node group with the specified instance_type must exist in the cluster # The instance type must have the processor configured above # Example: processor=arm, instance_type=c7g.4xlarge instance_type=c5.4xlarge # num_servers - number of model servers to deploy # note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips. # example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips. num_servers=1 # Kubernetes namespace namespace=mpi # Kubernetes app name app_name=${huggingface_model_name}-${processor} app_dir=app-${app_name}-${instance_type} # Test settings test_image_name=test-${huggingface_model_name} test_image_tag=:v9-cpu # request_frequency - time to sleep between two consecutive requests in curl tests request_frequency=0.01 # Stop random request test after num_requests number of requests num_requests=30 # Number of test containers to launch (default=1), use > 1 for scale testing num_test_containers=1 # test_instance_type - when runtime is kubernetes, instance type on which test pods will run test_instance_type=c5.4xlarge # test_namespace - when runtime is kubernetes, namespace where test pods will be created test_namespace=mpi # test_dir - when runtime is kubernetes, directory where test pod manifests are stored test_dir=app-${test_image_name}-${instance_type}