#!/usr/bin/env python # -*- coding: utf-8 -*- import copy import os import logging import sys import json import boto3 from typing import Dict, Optional import pandas as pd from smclarify.bias.report import FacetColumn, LabelColumn, bias_report, StageType from smclarify.util.dataset import Datasets # Install and import pytest import subprocess subprocess.call([sys.executable, "-m", "pip", "install", "pytest"]) import pytest logger = logging.getLogger(__name__) def approximate(expected, rtol=None, atol=None): """ An enhancement of pytest.approx to support complex composite objects like a nested dict (see [1]). [1] https://github.com/pytest-dev/pytest/issues/3164 :param expected: the input, it is supposed to be a complex composite object like a nested dict or list. :param rtol: relative tolerance :param atol: absolute tolerance :return: A copy of the input but with all floating number wrapped by pytest.approx. """ expected_copy = copy.deepcopy(expected) __approximate(expected_copy, rtol, atol) return expected_copy def __approximate(data, rtol=None, atol=None): t = type(data) if t == dict: for key, value in data.items(): if type(value) == float: data[key] = pytest.approx(value, rel=rtol, abs=atol) elif value: __approximate(value, rtol, atol) else: logger.info(f"Cannot convert null value for {data['name']}") elif t == list: for i in range(len(data)): value = data[i] if type(value) == float: data[i] = pytest.approx(value, rel=rtol, abs=atol) elif value: __approximate(value, rtol, atol) else: logger.info(f"Cannot convert null value for index {data}[{i}]") def fetch_input_data() -> pd.DataFrame: dataset = Datasets() s3_input_obj = dataset("german_csv") df = s3_input_obj.read_csv_data() return df def get_expected_results() -> Dict: s3_client = boto3.client("s3") test_dir = os.path.dirname(os.path.abspath(__file__)) file_name = "bias_metrics_results.json" s3_client.download_file( "sagemaker-clarify-datasets", f"statlog/result/{file_name}", f"{test_dir}/{file_name}" ) results_file = os.path.join(test_dir, file_name) with open(results_file) as json_file: expected_results = json.load(json_file) return expected_results def get_predicted_labels() -> pd.DataFrame: dataset = Datasets() s3_pred_label_obj = dataset("german_predicted_labels") predicted_labels = s3_pred_label_obj.read_csv_data(index_col=0) return predicted_labels.squeeze() def get_pretraining_bias_metrics( dataframe: pd.DataFrame, facet_column: FacetColumn, label_column: LabelColumn, group_variable: Optional[pd.Series], ) -> Dict: # Measure pre-training bias for the ForeignWorker attribute return bias_report( dataframe, facet_column, label_column, stage_type=StageType.PRE_TRAINING, metrics=["all"], group_variable=group_variable, ) def get_posttraining_bias_metrics( dataframe: pd.DataFrame, facet_column: FacetColumn, label_column: LabelColumn, pred_label_column: LabelColumn, group_variable: Optional[pd.Series], ) -> Dict: # Measure the post-training bias for the ForeignWorker attribute report = bias_report( dataframe, facet_column, label_column, stage_type=StageType.POST_TRAINING, predicted_label_column=pred_label_column, metrics=["all"], group_variable=group_variable, ) return report def test_bias_metrics(): dataframe = fetch_input_data() label_data = dataframe.pop("Class1Good2Bad") label_column = LabelColumn("Class1Good2Bad", label_data, [1]) facet_column = FacetColumn("ForeignWorker", [1]) group_variable = dataframe["A151"] # pre_training_bias metrics pre_training_metrics = get_pretraining_bias_metrics( dataframe, facet_column, label_column, group_variable ) # post training bias metrics predicted_labels = get_predicted_labels() pred_label_column = LabelColumn("_predicted_labels", predicted_labels, [1]) post_training_metrics = get_posttraining_bias_metrics( dataframe, facet_column, label_column, pred_label_column, group_variable ) expected_results = get_expected_results() pre_training_expected_result = expected_results.get("pre_training_bias_metrics") post_training_expected_result = expected_results.get("post_training_bias_metrics") if not (pre_training_metrics == approximate(pre_training_expected_result)): raise AssertionError("Pre_training Bias Metrics values differ from expected Metrics") if not (post_training_metrics == approximate(post_training_expected_result)): raise AssertionError("Post_training Bias Metrics values differ from expected Metrics") print("Test SMClarify Bias Metrics succeeded!") if __name__ == "__main__": try: sys.exit(test_bias_metrics()) except KeyboardInterrupt: pass