{
"cells": [
{
"cell_type": "markdown",
"id": "cb60aea8-54a9-4cf8-9cff-5f9b2043036b",
"metadata": {},
"source": [
"# Evaluation \n",
"* Container: codna_pytorch_py39"
]
},
{
"cell_type": "markdown",
"id": "6dc511d5-62aa-404b-9fbd-0f1a59446926",
"metadata": {},
"source": [
"## AutoReload"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "51b06f10-985d-4077-b729-e484c814f6b0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "markdown",
"id": "84ecd6e6-2939-4712-b342-136d077100a9",
"metadata": {},
"source": [
"## 1. parameter store 설정"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0ba446e0-448f-4e07-9d5a-225c16bde8cb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import boto3\n",
"from utils.ssm import parameter_store"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "22a09c09-735c-416e-b229-2d0a84bd458a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"strRegionName=boto3.Session().region_name\n",
"pm = parameter_store(strRegionName)\n",
"prefix = pm.get_params(key=\"PREFIX\")"
]
},
{
"cell_type": "markdown",
"id": "82fcf66a-7690-4ee7-bc5c-ecab6d254e84",
"metadata": {},
"source": [
"## 2.Processing-job for evaluation"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "31244e49-1ef6-44b0-aa9a-8c279e3e9093",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import sagemaker\n",
"from sagemaker.pytorch.estimator import PyTorch\n",
"from sagemaker.workflow.execution_variables import ExecutionVariables\n",
"from sagemaker.processing import ProcessingInput, ProcessingOutput, FrameworkProcessor"
]
},
{
"cell_type": "markdown",
"id": "ca29aeed-2e97-46eb-9b8f-9a6d7e705057",
"metadata": {
"tags": []
},
"source": [
"* **Set Up SageMaker Experiment**\n",
" - Create or load [SageMaker Experiment](https://docs.aws.amazon.com/sagemaker/latest/dg/experiments.html) for the example training job. This will create an experiment trial object in SageMaker."
]
},
{
"cell_type": "markdown",
"id": "fef44376-e6ac-43c2-b9e4-e18adb65a2ec",
"metadata": {},
"source": [
"* params for processing job"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "addb70d1-0a55-4f86-ae53-9beec538f023",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from time import strftime\n",
"from smexperiments.trial import Trial\n",
"from smexperiments.experiment import Experiment"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "58fff36e-b3aa-4684-a67a-fde9ae3b11ad",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def create_experiment(experiment_name):\n",
" try:\n",
" sm_experiment = Experiment.load(experiment_name)\n",
" except:\n",
" sm_experiment = Experiment.create(experiment_name=experiment_name)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5c43996d-e5f7-4fbd-992b-36f732f18e4f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def create_trial(experiment_name):\n",
" create_date = strftime(\"%m%d-%H%M%s\")\n",
" sm_trial = Trial.create(trial_name=f'{experiment_name}-{create_date}',\n",
" experiment_name=experiment_name)\n",
"\n",
" job_name = f'{sm_trial.trial_name}'\n",
" return job_name"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4e5789a5-57b6-4a7e-81bc-188e23c28b06",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"instance-type: local_gpu\n",
"image-uri: 419974056037.dkr.ecr.us-east-1.amazonaws.com/nemo-test-training\n",
"role: arn:aws:iam::419974056037:role/service-role/AmazonSageMaker-ExecutionRole-20221206T163436\n",
"bucket: sm-nemo-ramp\n",
"preprocessing-data-path: s3://sm-nemo-ramp/nemo-asr/preprocessing/data\n",
"model_artifact_s3_uri: s3://sm-nemo-ramp/nemo-asr/training/model-output/nemo-asr-nemo-experiments-0322-10521679482352/output/model.tar.gz\n",
"test_manifest_s3_path: s3://sm-nemo-ramp/nemo-asr/preprocessing/data/an4/test_manifest.json\n",
"wav_s3_uri: s3://sm-nemo-ramp/nemo-asr/preprocessing/data/an4/wav\n",
"eval_output_s3_uri: s3://sm-nemo-ramp/evaluation/output/eval_output\n",
"sagemaker_session: \n",
"git_config: {'repo': 'https://git-codecommit.us-east-1.amazonaws.com/v1/repos/nemo-code', 'branch': 'main', 'username': 'dongjin-at-419974056037', 'password': 'wtLv/fP4ESjBDnyW5xgqFPGR0dMTIyK5/8gK6IS1Zsg='}\n"
]
}
],
"source": [
"local_mode = True\n",
"\n",
"if local_mode: \n",
" instance_type = 'local_gpu'\n",
" \n",
" import os\n",
" from sagemaker.local import LocalSession\n",
" from nemo.utils.notebook_utils import download_an4\n",
" \n",
" sagemaker_session = LocalSession()\n",
" sagemaker_session.config = {'local': {'local_code': True}}\n",
" \n",
"else:\n",
" instance_type = \"ml.g4dn.xlarge\"\n",
" sagemaker_session = sagemaker.Session()\n",
" wav_s3_uri = os.path.join(preprocessing_data_path, \"an4\", \"wav\")\n",
"\n",
"sagemaker_role_arn = pm.get_params(key=prefix + '-SAGEMAKER-ROLE-ARN')\n",
"preprocessing_data_path = pm.get_params(key=prefix + '-PREP-DATA-PATH')\n",
"bucket_name = pm.get_params(key=prefix + '-BUCKET')\n",
"eval_image_uri = pm.get_params(key=''.join([prefix, '-IMAGE-URI']))\n",
"model_artifact_s3_uri = pm.get_params(key=prefix + '-MODEL-PATH').rsplit(\"/\", 1)[0]\n",
"model_artifact_s3_uri = pm.get_params(key=prefix + '-MODEL-PATH')\n",
"test_manifest_s3_path = os.path.join(preprocessing_data_path, \"an4\", \"test_manifest.json\")\n",
"wav_s3_uri = os.path.join(preprocessing_data_path, \"an4\", \"wav\")\n",
"eval_output_s3_uri = os.path.join(\"s3://\", bucket_name, \"evaluation\", \"output\", \"eval_output\")\n",
"\n",
"code_location= os.path.join(\n",
" \"s3://{}\".format(bucket_name),\n",
" prefix,\n",
" \"evaluation\",\n",
" \"backup_codes\"\n",
")\n",
"\n",
"git_config = {\n",
" 'repo': f'https://{pm.get_params(key=\"-\".join([prefix, \"CODE_REPO\"]))}',\n",
" 'branch': 'main',\n",
" 'username': pm.get_params(key=\"-\".join([prefix, \"CODECOMMIT-USERNAME\"]), enc=True),\n",
" 'password': pm.get_params(key=\"-\".join([prefix, \"CODECOMMIT-PWD\"]), enc=True)\n",
"} \n",
"\n",
"print (f\"instance-type: {instance_type}\")\n",
"print (f\"image-uri: {eval_image_uri}\")\n",
"print (f\"role: {sagemaker_role_arn}\")\n",
"print (f\"bucket: {bucket_name}\")\n",
"print (f\"preprocessing-data-path: {preprocessing_data_path}\")\n",
"print (f\"model_artifact_s3_uri: {model_artifact_s3_uri}\")\n",
"print (f\"test_manifest_s3_path: {test_manifest_s3_path}\")\n",
"print (f\"wav_s3_uri: {wav_s3_uri}\")\n",
"print (f\"eval_output_s3_uri: {eval_output_s3_uri}\")\n",
"print (f\"sagemaker_session: {sagemaker_session}\")\n",
"print (f\"git_config: {git_config}\")"
]
},
{
"cell_type": "markdown",
"id": "5b1d1a28-03f1-4f00-ad79-5786b15b0d46",
"metadata": {},
"source": [
"* Define processing job"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "f81731ba-391a-411e-b30b-56b3d4cabf61",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"eval_processor = FrameworkProcessor(\n",
" estimator_cls=PyTorch,\n",
" framework_version=\"1.13.1\",\n",
" role=sagemaker_role_arn, \n",
" image_uri=eval_image_uri,\n",
" instance_count=1,\n",
" instance_type=instance_type,\n",
" code_location=code_location,\n",
" env={\n",
" 'MANIFEST_PATH': '/opt/ml/input/data/testing/an4/wav', \n",
" 'WAV_PATH' : '/opt/ml/processing/input/wav'\n",
" }\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "432f4184-41c9-4104-8cb5-af5ea8581b80",
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n",
"INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n",
"INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n",
"Cloning into '/tmp/tmphqlsgmmc'...\n",
"remote: Counting objects: 20, done. \n",
"Already on 'main'\n",
"INFO:sagemaker.processing:Uploaded /tmp/tmphqlsgmmc/./code to s3://sm-nemo-ramp/nemo-asr/evaluation/backup_codes/nemo-asr-nemo-experiments-0322-11141679483673/source/sourcedir.tar.gz\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your branch is up to date with 'origin/main'.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sagemaker.processing:runproc.sh uploaded to s3://sm-nemo-ramp/nemo-asr/evaluation/backup_codes/nemo-asr-nemo-experiments-0322-11141679483673/source/runproc.sh\n",
"INFO:sagemaker:Creating processing-job with name nemo-asr-nemo-experiments-0322-11141679483673\n",
"WARNING:sagemaker.local.local_session:Experiment configuration is not supported in local mode.\n",
"INFO:sagemaker.local.local_session:Starting processing job\n",
"INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n",
"INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.\n",
"INFO:sagemaker.local.image:docker compose file: \n",
"networks:\n",
" sagemaker-local:\n",
" name: sagemaker-local\n",
"services:\n",
" algo-1-6lqjc:\n",
" container_name: glsnhfbk3i-algo-1-6lqjc\n",
" deploy:\n",
" resources:\n",
" reservations:\n",
" devices:\n",
" - capabilities:\n",
" - gpu\n",
" entrypoint:\n",
" - /bin/bash\n",
" - /opt/ml/processing/input/entrypoint/runproc.sh\n",
" environment:\n",
" - '[Masked]'\n",
" - '[Masked]'\n",
" image: 419974056037.dkr.ecr.us-east-1.amazonaws.com/nemo-test-training\n",
" networks:\n",
" sagemaker-local:\n",
" aliases:\n",
" - algo-1-6lqjc\n",
" stdin_open: true\n",
" tty: true\n",
" volumes:\n",
" - /tmp/tmpel5ak6zx/algo-1-6lqjc/output:/opt/ml/output\n",
" - /tmp/tmpel5ak6zx/algo-1-6lqjc/config:/opt/ml/config\n",
" - /tmp/tmpxy0g5602:/opt/ml/processing/model\n",
" - /tmp/tmpwhrlybm4:/opt/ml/processing/input/manifest\n",
" - /tmp/tmpm8awrjb3:/opt/ml/processing/input/wav\n",
" - /tmp/tmpocreuo4t:/opt/ml/processing/input/code/\n",
" - /tmp/tmpzdwlumma:/opt/ml/processing/input/entrypoint\n",
" - /tmp/tmpels1vttx/output/eval-output-data:/opt/ml/processing/evaluation\n",
" - /tmp/tmpel5ak6zx/shared:/opt/ml/shared\n",
"version: '2.3'\n",
"\n",
"INFO:sagemaker.local.image:docker command: docker-compose -f /tmp/tmpel5ak6zx/docker-compose.yaml up --build --abort-on-container-exit\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Creating glsnhfbk3i-algo-1-6lqjc ... \n",
"Creating glsnhfbk3i-algo-1-6lqjc ... done\n",
"Attaching to glsnhfbk3i-algo-1-6lqjc\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo W 2023-03-22 11:16:10 optimizers:66] Could not import distributed_fused_adam optimizer from Apex\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo W 2023-03-22 11:16:12 experimental:27] Module is experimental, not ready for production and is not fully supported. Use at your own risk.\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Loading nemo model.\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m checkpoint_path : trained_model/Conformer-CTC-Char1/checkpoints/Conformer-CTC-Char1--val_wer=1.0000-epoch=2-last.ckpt\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo W 2023-03-22 11:16:20 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Train config : \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m manifest_filepath: /opt/ml/input/data/training/an4/train_manifest.json\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m labels:\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - ' '\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - a\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - b\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - c\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - d\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - e\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - f\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - g\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - h\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - i\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - j\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - k\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - l\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - m\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - 'n'\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - o\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - p\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - q\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - r\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - s\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - t\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - u\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - w\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - x\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - 'y'\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - z\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - ''''\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m sample_rate: 16000\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m batch_size: 16\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m shuffle: true\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m num_workers: 8\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m pin_memory: true\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m trim_silence: false\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m max_duration: 16.7\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m min_duration: 0.1\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m is_tarred: false\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m tarred_audio_filepaths: null\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m shuffle_n: 2048\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m bucketing_strategy: synced_randomized\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m bucketing_batch_size: null\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo W 2023-03-22 11:16:20 modelPT:168] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Validation config : \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m manifest_filepath: /opt/ml/input/data/testing/an4/test_manifest.json\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m labels:\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - ' '\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - a\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - b\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - c\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - d\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - e\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - f\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - g\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - h\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - i\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - j\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - k\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - l\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - m\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - 'n'\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - o\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - p\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - q\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - r\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - s\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - t\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - u\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - w\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - x\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - 'y'\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - z\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - ''''\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m sample_rate: 16000\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m batch_size: 16\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m shuffle: false\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m num_workers: 8\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m pin_memory: true\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo W 2023-03-22 11:16:20 modelPT:174] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Test config : \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m manifest_filepath: null\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m labels:\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - ' '\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - a\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - b\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - c\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - d\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - e\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - f\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - g\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - h\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - i\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - j\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - k\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - l\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - m\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - 'n'\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - o\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - p\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - q\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - r\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - s\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - t\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - u\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - w\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - x\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - 'y'\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - z\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m - ''''\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m sample_rate: 16000\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m batch_size: 16\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m shuffle: false\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m num_workers: 8\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m pin_memory: true\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:20 features:287] PADDING: 0\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:20 ctc_models:64] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Replacing placeholder number of classes (-1) with actual number of classes - 28\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:20 conv_asr:428] num_classes of ConvASRDecoder is set to the size of the vocabulary: 28.\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Reading test data.\n",
"Reading manifest data: 130it [00:00, 128447.47it/s]\n",
"Applying change_dir: 100% 130/130 [00:00<00:00, 225220.78it/s]\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Finished processing manifest !\n",
"Writing manifest data: 100% 130/130 [00:00<00:00, 122228.09it/s]\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Finished writing manifest: /opt/ml/processing/evaluation/eval_test_manifest_processed.json\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:22 collections:193] Dataset loaded with 130 files totalling 0.10 hours\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:22 collections:194] 0 files were filtered totalling 0.00 hours\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [2023-03-22 11:16:22.761: W smdistributed/modelparallel/torch/nn/predefined_hooks.py:78] Found unsupported HuggingFace version 4.28.0.dev0 for automated tensor parallelism. HuggingFace modules will not be automatically distributed. You can use smp.tp_register_with_module API to register desired modules for tensor parallelism, or directly instantiate an smp.nn.DistributedModule. Supported HuggingFace transformers versions for automated tensor parallelism: ['4.17.0', '4.20.1', '4.21.0']\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m INFO:root:Using NamedTuple = typing._NamedTuple instead.\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [2023-03-22 11:16:22.800 ce06aba60f49:9 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [2023-03-22 11:16:22.827 ce06aba60f49:9 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:rubout g m e f three nine\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: t\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : rubout g m e f three nine\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : t\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:j p e g four\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : j p e g four\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:v a n e s s a\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : v a n e s s a\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:p i t t s b u r g h\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : p i t t s b u r g h\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:march seven nineteen sixty seven\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: n\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : march seven nineteen sixty seven\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : n\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:rubout c b w x v four\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : rubout c b w x v four\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:m y e r s\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : m y e r s\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1169] reference:p h i n n e y\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:24 wer:1170] predicted: v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : p h i n n e y\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : v\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:25 wer:1168] \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:25 wer:1169] reference:four one two two six eight four one four two\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m [NeMo I 2023-03-22 11:16:25 wer:1170] predicted: \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** reference : four one two two six eight four one four two\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m *********** predicted : \n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc |\u001b[0m Writing out evaluation report with wer: %f 0.9909443725743855\n",
"\u001b[36mglsnhfbk3i-algo-1-6lqjc exited with code 0\n",
"\u001b[0mAborting on container exit...\n",
"===== Job Complete =====\n"
]
}
],
"source": [
"experiment_name = pm.get_params(key=prefix + \"-EXPERI-NAME\")\n",
"create_experiment(experiment_name)\n",
"job_name = create_trial(experiment_name)\n",
"\n",
"eval_processor.run(\n",
" code=\"evaluate.py\",\n",
" source_dir=\"./code\",\n",
" git_config=git_config,\n",
" inputs=[\n",
" ProcessingInput(\n",
" source=model_artifact_s3_uri,\n",
" input_name=\"model_artifact\",\n",
" destination=\"/opt/ml/processing/model\"\n",
" ),\n",
" ProcessingInput(\n",
" source=test_manifest_s3_path,\n",
" input_name=\"test_manifest_file\",\n",
" destination=\"/opt/ml/processing/input/manifest\"\n",
" ),\n",
" ProcessingInput(\n",
" source=wav_s3_uri,\n",
" input_name=\"wav_dataset\",\n",
" destination=\"/opt/ml/processing/input/wav\"\n",
" ),\n",
" ],\n",
" outputs=[\n",
" ProcessingOutput(\n",
" output_name=\"eval-output-data\",\n",
" source=\"/opt/ml/processing/evaluation\",\n",
" destination=eval_output_s3_uri\n",
" \n",
" ),\n",
" ],\n",
" job_name=job_name,\n",
" experiment_config={\n",
" 'TrialName': job_name,\n",
" 'TrialComponentDisplayName': job_name,\n",
" },\n",
" wait=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "f08dc1ac-c16c-4027-9491-8da955da6ce8",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eval_processor"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e3328b4c-e16d-4042-aa81-d2ac611b8efe",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'Store suceess'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pm.put_params(key=\"-\".join([prefix, \"EVAL-OUTPUT-PATH\"]), value=eval_output_s3_uri+\"evaluation.json\", overwrite=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e49c5ec7-802d-41fb-b1df-4de1bf82534a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'s3://sm-nemo-ramp/evaluation/output/eval_outputevaluation.json'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pm.get_params(key=\"-\".join([prefix, \"EVAL-OUTPUT-PATH\"]))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "4052e4e4-00c5-4f89-8f0c-a6d584e024ab",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'sm-nemo-ramp'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pm.get_params(key=\"-\".join([prefix, \"BUCKET\"]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "080810cc-3bfc-4c2b-805b-3ed28888ae70",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"instance_type": "ml.t3.medium",
"kernelspec": {
"display_name": "conda_pytorch_p39",
"language": "python",
"name": "conda_pytorch_p39"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}