# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os from contextlib import ExitStack as does_not_raise from tempfile import TemporaryDirectory import numpy as np import pandas as pd import pytest from sagemaker_algorithm_toolkit import exceptions as exc from sagemaker_xgboost_container.prediction_utils import ( PREDICTIONS_OUTPUT_FILE, ValidationPredictionRecorder, ) @pytest.mark.parametrize( "config", [ # binary classification happy path dict( recorder_kwargs=dict( y_true=np.array([1, 0, 1, 0]), num_cv_round=1, classification=True, ), record_calls=[ dict(args=[np.array([0, 1, 2, 3]), np.array([1, 0, 1, 1])]), ], expected_output=pd.DataFrame( { 0: [1.0, 0, 1.0, 0], 1: [1.0, 0, 1.0, 1.0], 2: [1.0, 0, 1.0, 1.0], } ), ), # binary classification happy path, multiple folds dict( recorder_kwargs=dict( y_true=np.array([1, 0, 1, 0]), num_cv_round=1, classification=True, ), record_calls=[ dict(args=[np.array([0, 1]), np.array([1, 0])]), dict(args=[np.array([2, 3]), np.array([1, 1])]), ], expected_output=pd.DataFrame( { 0: [1.0, 0, 1.0, 0], 1: [1.0, 0, 1.0, 1.0], 2: [1.0, 0, 1.0, 1.0], } ), ), # binary classification happy path, multiple repeats dict( recorder_kwargs=dict( y_true=np.array([1, 0, 1, 0]), num_cv_round=3, classification=True, ), record_calls=[ dict(args=[np.array([0, 1, 2, 3]), np.array([0.6, 0, 0.6, 0.6])]), dict(args=[np.array([0, 1, 2, 3]), np.array([0.7, 0, 0.7, 0.4])]), dict(args=[np.array([0, 1, 2, 3]), np.array([0.8, 0, 0.8, 0.2])]), ], expected_output=pd.DataFrame( { 0: [1.0, 0, 1.0, 0], 1: [0.7, 0, 0.7, 0.4], 2: [1.0, 0, 1.0, 0.0], } ), ), # binary classification happy path, multiple repeats dict( recorder_kwargs=dict( y_true=np.array([1, 0, 1, 0]), num_cv_round=3, classification=True, ), record_calls=[ dict(args=[np.array([0, 1, 2]), np.array([0.6, 0, 0.6])]), dict(args=[np.array([3]), np.array([0.6])]), dict(args=[np.array([0]), np.array([0.7])]), dict(args=[np.array([1, 2]), np.array([0, 0.7])]), dict(args=[np.array([3]), np.array([0.4])]), dict(args=[np.array([0, 1, 2, 3]), np.array([0.8, 0, 0.8, 0.2])]), ], expected_output=pd.DataFrame( { 0: [1.0, 0, 1.0, 0], 1: [0.7, 0, 0.7, 0.4], 2: [1.0, 0, 1.0, 0.0], } ), ), # regression happy path, multiple repeats dict( recorder_kwargs=dict( y_true=np.array([0.8, 0, 1, 0]), num_cv_round=3, classification=False, ), record_calls=[ dict(args=[np.array([0, 1, 2, 3]), np.array([0.6, 0, 0.6, 0.6])]), dict(args=[np.array([0, 1, 2, 3]), np.array([0.7, 0, 0.7, 0.4])]), dict(args=[np.array([0, 1, 2, 3]), np.array([0.8, 0, 0.8, 0.2])]), ], expected_output=pd.DataFrame( { 0: [0.8, 0, 1.0, 0], 1: [0.7, 0, 0.7, 0.4], } ), ), # multiclass classification happy path dict( recorder_kwargs=dict( y_true=np.array([1, 2, 1, 0]), num_cv_round=1, classification=True, ), record_calls=[ dict( args=[ np.array([0, 1, 2, 3]), np.array([[0.1, 0.6, 0.3, 0.1], [0.1, 0.3, 0.4, 0.1], [0.8, 0.1, 0.3, 0.8]]).T, ] ), ], expected_output=pd.DataFrame( { 0: [1.0, 2.0, 1.0, 0.0], 1: [0.8, 0.6, 0.4, 0.8], 2: [2.0, 0.0, 1.0, 2.0], } ), ), # incorrect shape of predictions dict( recorder_kwargs=dict( y_true=np.array([0.8, 0, 1, 0]), num_cv_round=3, classification=False, ), record_calls=[ dict(args=[np.array([0, 1, 2, 3]), np.array([0.6, 0, 0.6, 0.6])]), dict( args=[ np.array([0, 1, 2, 3]), np.array([[0.1, 0.6, 0.3, 0.1], [0.1, 0.3, 0.4, 0.1], [0.8, 0.1, 0.3, 0.8]]).T, ], record_raises=pytest.raises(exc.AlgorithmError), ), ], expected_output=None, ), # incomplete predictions dict( recorder_kwargs=dict( y_true=np.array([0.8, 0, 1, 0]), num_cv_round=1, classification=False, ), record_calls=[ dict(args=[np.array([0, 1, 2]), np.array([0.6, 0, 0.3])]), ], save_raises=pytest.raises(exc.AlgorithmError), expected_output=None, ), # too many predictions dict( recorder_kwargs=dict( y_true=np.array([0.8, 0, 1, 0]), num_cv_round=1, classification=False, ), record_calls=[ dict(args=[np.array([0, 1, 2]), np.array([0.6, 0, 0.3])]), dict( args=[np.array([0, 1, 2]), np.array([0.6, 0, 0.3])], record_raises=pytest.raises(exc.AlgorithmError) ), ], expected_output=None, ), ], ) def test_validation_prediction_recorder(config): with TemporaryDirectory() as temp_folder: recorder = ValidationPredictionRecorder(output_data_dir=temp_folder, **config["recorder_kwargs"]) for call_config in config["record_calls"]: with call_config.get("record_raises", does_not_raise()): recorder.record(*call_config["args"]) if call_config.get("record_raises", None) is not None: return with config.get("save_raises", does_not_raise()): recorder.save() if config.get("save_raises", None) is not None: return df = pd.read_csv(os.path.join(temp_folder, PREDICTIONS_OUTPUT_FILE), header=None) assert df.equals(config["expected_output"])