"""Evaluation script for measuring mean squared error.""" import json import logging import pathlib import pickle import tarfile import numpy as np import pandas as pd import xgboost from sklearn.metrics import mean_squared_error logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) if __name__ == "__main__": logger.info("Starting evaluation.") model_path = "/opt/ml/processing/model/model.tar.gz" with tarfile.open(model_path) as tar: tar.extractall(path=".") logger.info("Loading xgboost model.") model = pickle.load(open("xgboost-model", "rb")) logger.info("Reading test data.") test_path = "/opt/ml/processing/test/test.csv" df = pd.read_csv(test_path, header=None) logger.info("Reading test data.") y_test = df.iloc[:, 0].to_numpy() df.drop(df.columns[0], axis=1, inplace=True) X_test = xgboost.DMatrix(df.values) logger.info("Performing predictions against test data.") predictions = model.predict(X_test) logger.info("Calculating mean squared error.") mse = mean_squared_error(y_test, predictions) std = np.std(y_test - predictions) report_dict = { "regression_metrics": { "mse": { "value": mse, "standard_deviation": std }, }, } output_dir = "/opt/ml/processing/evaluation" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) logger.info("Writing out evaluation report with mse: %f and std: %f", mse, std) evaluation_path = f"{output_dir}/evaluation.json" with open(evaluation_path, "w") as f: f.write(json.dumps(report_dict)) logger.info("Model evaluation completed.")