# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 import sys import os import subprocess # Install packages previous to executing the rest of the script. You can also build your own custom container # with your individal dependecies if needed subprocess.check_call([sys.executable, "-m", "pip", "install", "mxnet", "opencv-python"]) os.system("apt-get update") os.system("apt-get install ffmpeg libsm6 libxext6 -y") import argparse import json import warnings import logging import pandas as pd import numpy as np from glob import glob from datetime import datetime import tarfile from PIL import Image from glob import glob import re import mxnet as mx import mxnet.ndarray as nd from mxnet import nd, gluon from mxnet.gluon.data.vision import transforms from sklearn.metrics import accuracy_score, classification_report, roc_auc_score # Constants # The images size used CLASS_LABELS = ['good', 'bad'] logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu() if __name__=='__main__': parser = argparse.ArgumentParser() parser.add_argument('--image-width', type=int, default=224) parser.add_argument('--image-height', type=int, default=224) args, _ = parser.parse_known_args() logger.info('Received arguments {}'.format(args)) # Define the paths test_data_base_path = '/opt/ml/processing/test' model_data_base_path = '/opt/ml/processing/model' report_output_base_path = '/opt/ml/processing/report' IMAGE_WIDTH = int(args.image_width) IMAGE_HEIGHT = int(args.image_height) # Unzipping the model model_filename = 'model.tar.gz' model_path = os.path.join(model_data_base_path, model_filename) model_path_extracted = './model/' with tarfile.open(model_path) as tar: tar.extractall(path=model_path_extracted) # Get the files needed for loading, parse some strings symbol_file = glob(os.path.join(model_path_extracted, '*symbol.json'))[0] params_file = glob(os.path.join(model_path_extracted, '*.params'))[0] logger.info('Symbol file: %s' % symbol_file) logger.info('Params file: %s' % params_file) symbol_filename = os.path.basename(symbol_file) params_filename = os.path.basename(params_file) # Extract name and epoch needed for loading model_name = re.search(r".+(?=-symbol\.json)", symbol_filename).group(0) epoch = int(re.search(r"[0-9]+(?=\.params)", params_filename).group(0)) # Loading model logger.info('Loading model from artifacts...') sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(model_path_extracted, model_name), epoch) model = mx.mod.Module(symbol=sym, context=mx.cpu(), label_names=['data']) model.bind(for_training=False, data_shapes=[('data', (1,3,IMAGE_WIDTH,IMAGE_HEIGHT))], label_shapes=model._label_shapes) model.set_params(arg_params, aux_params, allow_missing=True) # Load test data into record iterator (batch size 1) test_data = mx.io.ImageRecordIter( path_imgrec = os.path.join(test_data_base_path, 'test.rec'), data_shape = (3, IMAGE_WIDTH, IMAGE_HEIGHT), batch_size = 1, shuffle = True ) # Lists for the predicted and true labels y_true = [] y_pred = [] # For each batch (size=1) predict the class # TODO: make batch prediction work for batch in test_data: res = model.predict(eval_data=batch.data[0]) pred_class = int(np.argmax(res[0]).asnumpy()[0]) y_pred.append(pred_class) y_true.append(int(batch.label[0].asnumpy())) clf_report = classification_report(y_true, y_pred, target_names=CLASS_LABELS, output_dict=True) accuracy = accuracy_score(y_true, y_pred) # Save the preprocessing report to make information available to downstream steps evaluation_report = { 'multiclass_classification_metrics': { 'accuracy': { 'value': accuracy, 'standard_deviation': 'NaN' }, 'weighted_recall': { 'value': clf_report['weighted avg']['recall'], 'standard_deviation': 'NaN' }, 'weighted_precision': { 'value': clf_report['weighted avg']['precision'], 'standard_deviation': 'NaN' }, 'weighted_f1': { 'value': clf_report['weighted avg']['f1-score'], 'standard_deviation': 'NaN' } }, 'classification_report': clf_report } print('Evaluation report:', evaluation_report) report_output_path = os.path.join(report_output_base_path, 'evaluation_report.json') with open(report_output_path, "w") as f: f.write(json.dumps(evaluation_report))