In [None]:
# Upgrade packages and install libsndfile1
!sudo yum upgrade -y
!sudo yum install libsndfile1 -y

In [None]:
# Install librosa (if you want to process mp3) and upgrade sagemaker
!pip install librosa
!pip install sagemaker --upgrade

In [None]:
import sagemaker.huggingface

#BUCKET="[BUCKET_NAME]" # please use your bucket name
ROLE = sagemaker.get_execution_role()
sess = sagemaker.Session()
BUCKET = sess.default_bucket()
print(f"sagemaker role arn: {ROLE}")
print(f"sagemaker bucket: {BUCKET}")
print(f"sagemaker session region: {sess.boto_region_name}")

In [None]:
# The SageMaker Model is a container containing the running environment + inference scripte + model data.
# The SageMaker Endpoint is a running cluster of the SageMaker Models

from sagemaker.huggingface import HuggingFaceModel

# Change model_name (create one) and model_data (copy from the training job S3 model artifact) accordingly
model_name = 'YOUR MODEL NAME'
model_data = 'COPY FROM THE TRAINING JOB S3 MODEL ARTIFACT'
endpoint_name = 'Whisper-zhtw'


huggingface_model = HuggingFaceModel(
        entry_point = 'inference.py',
        source_dir='./scripts',
        name = model_name,
        transformers_version='4.17.0',
        pytorch_version='1.10.2',
        py_version='py38',
        model_data=model_data,
        role=ROLE,
    )



In [None]:
# The create_model method takes a lot of temporary space under the root. For large models, the root disk in SageMaker Studio Notebook (which is a container) is not enough. 
# This notebook is simply invoking SageMaker APIs, so it can be done on a EC2 as well.

# For creating a new model and deploy as a new endpoint, the easiest way is to call the deploy method under the model
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5d.2xlarge",
    endpoint_name = endpoint_name,
)

# If you want to update an existing endpoint with a new model, you 
# from sagemaker.predictor import Predictor
# from sagemaker.serializers import DataSerializer
# from sagemaker.deserializers import JSONDeserializer
#
# sess.create_model(
#     model_name,
#     ROLE,
#     huggingface_model.prepare_container_def(
#         instance_type='ml.m5d.2xlarge'
#     )
# )
#
# audio_serializer = DataSerializer(content_type='audio/x-audio')
#
# predictor = Predictor('whisper-zhtw', serializer=audio_serializer, deserializer=JSONDeserializer())
# predictor.update_endpoint(model_name=endpoint_name, initial_instance_count=1, instance_type='ml.m5d.2xlarge')

In [None]:
%%time
# Do the prediction
from sagemaker.predictor import Predictor
from sagemaker.serializers import DataSerializer

audio_path = 'test-audio.flac'
audio_serializer = DataSerializer(content_type='audio/x-audio')

predictor = Predictor(endpoint_name, serializer=audio_serializer)
with open(audio_path, "rb") as data_file:
    audio_data = data_file.read()
    
prediction = predictor.predict(audio_data)
print(prediction)

In [None]:
prediction.decode('unicode_escape')