"""Evaluation script for measuring model accuracy.""" import json import logging import pathlib import pickle import tarfile import numpy as np import pandas as pd import xgboost from sklearn.metrics import ( accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve, ) logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) if __name__ == "__main__": model_path = "/opt/ml/processing/model/model.tar.gz" with tarfile.open(model_path) as tar: import os def is_within_directory(directory, target): abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) prefix = os.path.commonprefix([abs_directory, abs_target]) return prefix == abs_directory def safe_extract(tar, path=".", members=None, *, numeric_owner=False): for member in tar.getmembers(): member_path = os.path.join(path, member.name) if not is_within_directory(path, member_path): raise Exception("Attempted Path Traversal in Tar File") tar.extractall(path, members, numeric_owner=numeric_owner) safe_extract(tar, path="..") logger.debug("Loading xgboost model.") model = pickle.load(open("xgboost-model", "rb")) logger.debug("Loading test input data.") test_path = "/opt/ml/processing/test/test.csv" df = pd.read_csv(test_path, header=None) logger.debug("Reading test data.") y_test = df.iloc[:, 0].to_numpy() df.drop(df.columns[0], axis=1, inplace=True) X_test = xgboost.DMatrix(df.values) logger.info("Performing predictions against test data.") prediction_probabilities = model.predict(X_test) predictions = np.round(prediction_probabilities) precision = precision_score(y_test, predictions) recall = recall_score(y_test, predictions) accuracy = accuracy_score(y_test, predictions) conf_matrix = confusion_matrix(y_test, predictions) fpr, tpr, _ = roc_curve(y_test, prediction_probabilities) logger.debug("Accuracy: {}".format(accuracy)) logger.debug("Precision: {}".format(precision)) logger.debug("Recall: {}".format(recall)) logger.debug("Confusion matrix: {}".format(conf_matrix)) # Available metrics to add to model: https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html report_dict = { "binary_classification_metrics": { "accuracy": {"value": accuracy, "standard_deviation": "NaN"}, "precision": {"value": precision, "standard_deviation": "NaN"}, "recall": {"value": recall, "standard_deviation": "NaN"}, "confusion_matrix": { "0": {"0": int(conf_matrix[0][0]), "1": int(conf_matrix[0][1])}, "1": {"0": int(conf_matrix[1][0]), "1": int(conf_matrix[1][1])}, }, "receiver_operating_characteristic_curve": { "false_positive_rates": list(fpr), "true_positive_rates": list(tpr), }, }, } output_dir = "/opt/ml/processing/evaluation" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) evaluation_path = f"{output_dir}/evaluation.json" with open(evaluation_path, "w") as f: f.write(json.dumps(report_dict))