"""Evaluation script for measuring model accuracy.""" import json import os import tarfile import logging import pickle import pandas as pd import xgboost logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) # May need to import additional metrics depending on what you are measuring. # See https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html from sklearn.metrics import classification_report, roc_auc_score, accuracy_score def get_dataset(dir_path, dataset_name) -> pd.DataFrame: files = [ os.path.join(dir_path, file) for file in os.listdir(dir_path) ] if len(files) == 0: raise ValueError(('There are no files in {}.\n' + 'This usually indicates that the channel ({}) was incorrectly specified,\n' + 'the data specification in S3 was incorrectly specified or the role specified\n' + 'does not have permission to access the data.').format(files, dataset_name)) raw_data = [ pd.read_csv(file, header=None) for file in files ] df = pd.concat(raw_data) return df if __name__ == "__main__": model_path = "/opt/ml/processing/model/model.tar.gz" with tarfile.open(model_path) as tar: tar.extractall(path="..") logger.debug("Loading xgboost model.") model = pickle.load(open("xgboost-model", "rb")) logger.info("Loading test input data") test_path = "/opt/ml/processing/test" df = get_dataset(test_path, "test_set") logger.debug("Reading test data.") y_test = df.iloc[:, 0].to_numpy() df.drop(df.columns[0], axis=1, inplace=True) X_test = xgboost.DMatrix(df.values) logger.info("Performing predictions against test data.") predictions_probs = model.predict(X_test) predictions = predictions_probs.round() logger.info("Creating classification evaluation report") acc = accuracy_score(y_test, predictions) auc = roc_auc_score(y_test, predictions_probs) # The metrics reported can change based on the model used, but it must be a specific name per (https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html) report_dict = { "binary_classification_metrics": { "accuracy": { "value": acc, "standard_deviation": "NaN", }, "auc": {"value": auc, "standard_deviation": "NaN"}, }, } logger.info("Classification report:\n{}".format(report_dict)) evaluation_output_path = os.path.join( "/opt/ml/processing/evaluation", "evaluation.json" ) logger.info("Saving classification report to {}".format(evaluation_output_path)) with open(evaluation_output_path, "w") as f: f.write(json.dumps(report_dict))