# Model Deployment 
* Container: codna_pytorch_py39

## AutoReload

In [1]:
%load_ext autoreload
%autoreload 2

## 1. parameter store 설정

In [2]:
import boto3
from utils.ssm import parameter_store

In [3]:
strRegionName=boto3.Session().region_name
pm = parameter_store(strRegionName)
prefix = pm.get_params(key="PREFIX")

## 2.package import for model deployment

In [4]:
import os
import sagemaker
from sagemaker.pytorch.model import PyTorchModel

In [5]:
from time import strftime
from smexperiments.trial import Trial
from smexperiments.experiment import Experiment

In [6]:
def create_experiment(experiment_name):
 try:
 sm_experiment = Experiment.load(experiment_name)
 except:
 sm_experiment = Experiment.create(experiment_name=experiment_name)

In [7]:
def create_trial(experiment_name):
 create_date = strftime("%m%d-%H%M%s")
 sm_trial = Trial.create(trial_name=f'{experiment_name}-{create_date}',
 experiment_name=experiment_name)

 job_name = f'{sm_trial.trial_name}'
 return job_name

In [8]:
local_mode = True

if local_mode: 
 inference_instance_type = 'local_gpu'
 
 import os
 from sagemaker.local import LocalSession
 
 sagemaker_session = LocalSession()
 sagemaker_session.config = {'local': {'local_code': True}}
 
else:
 inference_instance_type = "ml.g4dn.xlarge"
 sagemaker_session = sagemaker.Session()
 


sagemaker_role_arn = pm.get_params(key=prefix + '-SAGEMAKER-ROLE-ARN') 
bucket_name = pm.get_params(key=prefix + '-BUCKET')
model_artifact_s3_uri = pm.get_params(key=prefix + '-MODEL-PATH')
inf_image_uri = pm.get_params(key=''.join([prefix, '-INF-IMAGE-URI']))

code_location= os.path.join(
 "s3://{}".format(bucket_name),
 prefix,
 "inference",
 "backup_codes"
)

monitor_output= os.path.join(
 "s3://{}".format(bucket_name),
 prefix,
 "inference",
 "monitor_output"
)

git_config = {
 'repo': f'https://{pm.get_params(key="-".join([prefix, "CODE_REPO"]))}',
 'branch': 'main',
 'username': pm.get_params(key="-".join([prefix, "CODECOMMIT-USERNAME"]), enc=True),
 'password': pm.get_params(key="-".join([prefix, "CODECOMMIT-PWD"]), enc=True)
} 

print (f"sagemaker_role_arn: {sagemaker_role_arn}")
print (f"model_artifact_s3_uri: {model_artifact_s3_uri}")
print (f"inf_image_uri: {inf_image_uri}")
print (f"code_location: {code_location}")
print (f"monitor_output: {monitor_output}")
print (f"git_config: {git_config}")

sagemaker_role_arn: arn:aws:iam::419974056037:role/service-role/AmazonSageMaker-ExecutionRole-20221206T163436
model_artifact_s3_uri: s3://sm-nemo-ramp/nemo-asr/training/model-output/nemo-asr-nemo-experiments-0322-10521679482352/output/model.tar.gz
inf_image_uri: 419974056037.dkr.ecr.us-east-1.amazonaws.com/nemo-test-inference
code_location: s3://sm-nemo-ramp/nemo-asr/inference/backup_codes
monitor_output: s3://sm-nemo-ramp/nemo-asr/inference/monitor_output
git_config: {'repo': 'https://git-codecommit.us-east-1.amazonaws.com/v1/repos/nemo-code', 'branch': 'main', 'username': 'dongjin-at-419974056037', 'password': 'wtLv/fP4ESjBDnyW5xgqFPGR0dMTIyK5/8gK6IS1Zsg='}


* Define inference job

In [10]:
model = PyTorchModel(
 entry_point='predictor.py',
 source_dir='./code',
 git_config=git_config,
 code_location=code_location,
 model_data=model_artifact_s3_uri,
 role=sagemaker_role_arn,
 image_uri=inf_image_uri,
 # framework_version="1.13.1",
 # py_version="py39",
 sagemaker_session=sagemaker_session
)

Cloning into '/tmp/tmpz7ocy0g2'...
remote: Counting objects: 20, done. 


Your branch is up to date with 'origin/main'.


Already on 'main'


In [11]:
if local_mode: 
 data_capture_config = None
else:
 from sagemaker.model_monitor import DataCaptureConfig

 data_capture_config = DataCaptureConfig(
 enable_capture=True,
 sampling_percentage=100,
 destination_s3_uri=monitor_output
 )

In [12]:
experiment_name = pm.get_params(key=prefix + "-EXPERI-NAME")
create_experiment(experiment_name)
job_name = create_trial(experiment_name)


predictor = model.deploy(
 initial_instance_count=1,
 instance_type=inference_instance_type,
 data_capture_config=data_capture_config,
 endpoint_name=job_name,
 experiment_config={
 'TrialName': job_name,
 'TrialComponentDisplayName': job_name,
 }
)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker:Creating model with name: nemo-test-inference-2023-03-22-11-38-32-323
INFO:sagemaker:Creating endpoint-config with name nemo-asr-nemo-experiments-0322-11371679485066
INFO:sagemaker:Creating endpoint with name nemo-asr-nemo-experiments-0322-11371679485066
INFO:sagemaker.local.image:serving
INFO:sagemaker.local.image:creating hosting dir in /tmp/tmpkdalsucj
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
 sagemaker-local:
 name: sagemaker-local
services:
 algo-1-62uxc:
 c

Attaching to w00uaj7h7n-algo-1-62uxc
[36mw00uaj7h7n-algo-1-62uxc |[0m ['torchserve', '--start', '--model-store', '/.sagemaker/ts/models', '--ts-config', '/etc/sagemaker-ts.properties', '--log-config', '/opt/conda/lib/python3.9/site-packages/sagemaker_pytorch_serving_container/etc/log4j2.xml', '--models', 'model=/opt/ml/model']
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:44,658 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Initializing plugins manager...
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:44,763 [INFO ] main org.pytorch.serve.ModelServer - 
[36mw00uaj7h7n-algo-1-62uxc |[0m Torchserve version: 0.7.0
[36mw00uaj7h7n-algo-1-62uxc |[0m TS Home: /opt/conda/lib/python3.9/site-packages
[36mw00uaj7h7n-algo-1-62uxc |[0m Current directory: /
[36mw00uaj7h7n-algo-1-62uxc |[0m Temp directory: /home/model-server/tmp
[36mw00uaj7h7n-algo-1-62uxc |[0m Metrics config path: /opt/conda/lib/python3.9/site-packages/ts/configs/metrics.yaml
[36mw00uaj7h7n-

INFO:sagemaker.local.entities:Checking if serving container is up, attempt: 10


[36mw00uaj7h7n-algo-1-62uxc |[0m Model server started.
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:45,226 [WARN ] pool-3-thread-1 org.pytorch.serve.metrics.MetricCollector - worker pid is not available yet.
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:45,408 [INFO ] pool-2-thread-2 ACCESS_LOG - /172.19.0.1:42912 "GET /ping HTTP/1.1" 200 45
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:45,412 [INFO ] pool-2-thread-2 TS_METRICS - Requests2XX.Count:1|#Level:Host|#hostname:226f672518e1,timestamp:1679485125
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:45,863 [INFO ] pool-3-thread-1 TS_METRICS - CPUUtilization.Percent:0.0|#Level:Host|#hostname:226f672518e1,timestamp:1679485125
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:45,864 [INFO ] pool-3-thread-1 TS_METRICS - DiskAvailable.Gigabytes:445.66086196899414|#Level:Host|#hostname:226f672518e1,timestamp:1679485125
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:38:45,864 [INFO ] pool-3-thread-1 TS_METRICS -

In [13]:
paths2audio_files = f"{os.getcwd()}/data/preprocessing/an4/wav/an4test_clstk/fcaw/an406-fcaw-b.wav"
paths2audio_files

'/home/ec2-user/SageMaker/nemo-on-sagemaker/1.building-component/data/preprocessing/an4/wav/an4test_clstk/fcaw/an406-fcaw-b.wav'

In [14]:
import librosa
import IPython.display as ipd

# Load and listen to the audio file
audio, sample_rate = librosa.load(paths2audio_files)

ipd.Audio(paths2audio_files, rate=sample_rate)

[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:45,763 [INFO ] pool-3-thread-2 TS_METRICS - CPUUtilization.Percent:50.0|#Level:Host|#hostname:226f672518e1,timestamp:1679485305
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:45,765 [INFO ] pool-3-thread-2 TS_METRICS - DiskAvailable.Gigabytes:445.66065979003906|#Level:Host|#hostname:226f672518e1,timestamp:1679485305
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:45,765 [INFO ] pool-3-thread-2 TS_METRICS - DiskUsage.Gigabytes:32.49269104003906|#Level:Host|#hostname:226f672518e1,timestamp:1679485305
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:45,766 [INFO ] pool-3-thread-2 TS_METRICS - DiskUtilization.Percent:6.8|#Level:Host|#hostname:226f672518e1,timestamp:1679485305
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:45,766 [INFO ] pool-3-thread-2 TS_METRICS - GPUMemoryUtilization.Percent:5.364990234375|#Level:Host,device_id:0|#hostname:226f672518e1,timestamp:1679485305
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T1

In [15]:
from sagemaker.predictor import Predictor

In [None]:
# predictor = Predictor('nemo-cyj-nemo-experiments-0322-10501679482211')

In [16]:
from sagemaker.serializers import DataSerializer
predictor.serializer = DataSerializer()

In [17]:
predictor.predict(paths2audio_files)

[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:56,559 [INFO ] W-9000-model_1.0 org.pytorch.serve.wlm.WorkerThread - Flushing req. to backend at: 1679485316559
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:56,562 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - Backend received inference at: 1679485316
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:56,579 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - ***************** 1input_fn ********************
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:56,579 [WARN ] W-9000-model_1.0-stderr MODEL_LOG - 
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:56,579 [INFO ] W-9000-model_1.0-stdout MODEL_LOG - ***************** predict_fn ********************
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:56,580 [WARN ] W-9000-model_1.0-stderr MODEL_LOG - 
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:41:57,849 [WARN ] W-9000-model_1.0-stderr MODEL_LOG - Transcribing: 0%| | 0/1 [00:00, ?it/s]
[36mw00uaj7h7n-algo-1-62uxc |[0m 2

array({'result': [' ']}, dtype=object)

In [18]:
pm.put_params(key="ENDPOINT-NAME", value=job_name, overwrite=True)
pm.put_params(key="MONITOR-OUTPUT", value=monitor_output, overwrite=True)

'Store suceess'

In [19]:
print (f'ENDPOINT-NAME: {pm.get_params(key="ENDPOINT-NAME")}')
print (f'MONITOR-OUTPUT: {pm.get_params(key="MONITOR-OUTPUT")}')

ENDPOINT-NAME: nemo-asr-nemo-experiments-0322-11371679485066
MONITOR-OUTPUT: s3://sm-nemo-ramp/nemo-asr/inference/monitor_output
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:42:45,754 [INFO ] pool-3-thread-2 TS_METRICS - CPUUtilization.Percent:0.0|#Level:Host|#hostname:226f672518e1,timestamp:1679485365
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:42:45,754 [INFO ] pool-3-thread-2 TS_METRICS - DiskAvailable.Gigabytes:445.9645690917969|#Level:Host|#hostname:226f672518e1,timestamp:1679485365
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:42:45,754 [INFO ] pool-3-thread-2 TS_METRICS - DiskUsage.Gigabytes:32.18878173828125|#Level:Host|#hostname:226f672518e1,timestamp:1679485365
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:42:45,755 [INFO ] pool-3-thread-2 TS_METRICS - DiskUtilization.Percent:6.7|#Level:Host|#hostname:226f672518e1,timestamp:1679485365
[36mw00uaj7h7n-algo-1-62uxc |[0m 2023-03-22T11:42:45,755 [INFO ] pool-3-thread-2 TS_METRICS - GPUMemoryUtilization.Percent:7.8

Exception in thread Thread-5:
Traceback (most recent call last):
 File "/home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/sagemaker/local/image.py", line 854, in run
 _stream_output(self.process)
 File "/home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/sagemaker/local/image.py", line 916, in _stream_output
 raise RuntimeError("Process exited with code: %s" % exit_code)
RuntimeError: Process exited with code: 137

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
 File "/home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/threading.py", line 980, in _bootstrap_inner
 self.run()
 File "/home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/sagemaker/local/image.py", line 859, in run
 raise RuntimeError(msg)
RuntimeError: Failed to run: ['docker-compose', '-f', '/tmp/tmpkdalsucj/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 137


[36mw00uaj7h7n-algo-1-62uxc exited with code 137
[0mAborting on container exit...


In [None]:
# predictor.delete_endpoint()