import subprocess import sys import json import logging import pathlib import tarfile import os import numpy as np import pandas as pd logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) if __name__ == "__main__": logger.debug("Starting evaluation.") model_path = "/opt/ml/processing/model/model.tar.gz" with tarfile.open(model_path) as tar: def is_within_directory(directory, target): abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) prefix = os.path.commonprefix([abs_directory, abs_target]) return prefix == abs_directory def safe_extract(tar, path=".", members=None, *, numeric_owner=False): for member in tar.getmembers(): member_path = os.path.join(path, member.name) if not is_within_directory(path, member_path): raise Exception("Attempted Path Traversal in Tar File") tar.extractall(path, members, numeric_owner=numeric_owner) safe_extract(tar, path="./hf_model") logger.debug(os.listdir("./hf_model")) with open("./hf_model/evaluation.json") as f: eval_result = json.load(f) logger.debug(eval_result) output_dir = "/opt/ml/processing/evaluation" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) evaluation_path = f"{output_dir}/evaluation.json" with open(evaluation_path, "w") as f: f.write(json.dumps(eval_result))