{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# SMclarify Bias metrics example usage" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "from smclarify.bias.report import *\n", "from smclarify.util.dataset import Datasets, german_lending_readable_values\n", "from typing import Dict\n", "from collections import defaultdict" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get the [german lending dataset](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)). Transform the values to human readable strings from the dataset description. Dataset is functionally equivalent.\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": "0 1\n1 0\n2 1\n3 1\n4 0\n ..\n995 1\n996 1\n997 1\n998 0\n999 1\nName: target, Length: 1000, dtype: category\nCategories (2, int64): [0, 1]" }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#df = pd.read_csv('../german.data', header=None, sep=' ')\n", "df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data', header=None, sep=' ')\n", "df.columns = [\"CheckingAC_Status\",\"MaturityMonths\",\"CreditHistory\",\"Purpose\",\"LoanAmount\",\"SavingsAC\",\n", " \"Employment\",\"InstalmentPctOfIncome\",\"SexAndStatus\",\"OtherDebts\",\"PresentResidenceYears\",\n", " \"Property\",\"Age\",\"OtherInstalmentPlans\",\"Housing\",\"NumExistingLoans\",\"Job\",\n", " \"Dependents\",\"Telephone\",\"ForeignWorker\",\"Class1Good2Bad\"]\n", "df = german_lending_readable_values(df)\n", "df.shape\n", "df['target']" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": " CheckingAC_Status MaturityMonths CreditHistory \\\n0 x < 0 DM 6 critical accnt. / other credits \n1 0 <= x < 200 DM 48 existing credits paid \n2 no checking account 12 critical accnt. / other credits \n3 x < 0 DM 42 existing credits paid \n4 x < 0 DM 24 delay \n\n Purpose LoanAmount SavingsAC Employment InstalmentPctOfIncome \\\n0 radio/tv 1169 unknown x >= 7 years 4 \n1 radio/tv 5951 x < 100 DM 1 <= x < 4 years 2 \n2 education 2096 x < 100 DM 4 <= x < 7 years 2 \n3 forniture 7882 x < 100 DM 4 <= x < 7 years 2 \n4 new car 4870 x < 100 DM 1 <= x < 4 years 3 \n\n SexAndStatus OtherDebts ... \\\n0 male single none ... \n1 female divorced/separated/married none ... \n2 male single none ... \n3 male single guarantor ... \n4 male single none ... \n\n Property Age OtherInstalmentPlans Housing \\\n0 real estate 67 none own \n1 real estate 22 none own \n2 real estate 49 none own \n3 soc. savings / life insurance 45 none for free \n4 unknown 53 none for free \n\n NumExistingLoans Job Dependents Telephone \\\n0 2 skilled employee / official 1 yes \n1 1 skilled employee / official 1 none \n2 1 unskilled-resident 2 none \n3 1 skilled employee / official 2 none \n4 2 skilled employee / official 2 none \n\n ForeignWorker target \n0 yes 1 \n1 yes 0 \n2 yes 1 \n3 yes 1 \n4 yes 0 \n\n[5 rows x 21 columns]", "text/html": "
\n | CheckingAC_Status | \nMaturityMonths | \nCreditHistory | \nPurpose | \nLoanAmount | \nSavingsAC | \nEmployment | \nInstalmentPctOfIncome | \nSexAndStatus | \nOtherDebts | \n... | \nProperty | \nAge | \nOtherInstalmentPlans | \nHousing | \nNumExistingLoans | \nJob | \nDependents | \nTelephone | \nForeignWorker | \ntarget | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \nx < 0 DM | \n6 | \ncritical accnt. / other credits | \nradio/tv | \n1169 | \nunknown | \nx >= 7 years | \n4 | \nmale single | \nnone | \n... | \nreal estate | \n67 | \nnone | \nown | \n2 | \nskilled employee / official | \n1 | \nyes | \nyes | \n1 | \n
1 | \n0 <= x < 200 DM | \n48 | \nexisting credits paid | \nradio/tv | \n5951 | \nx < 100 DM | \n1 <= x < 4 years | \n2 | \nfemale divorced/separated/married | \nnone | \n... | \nreal estate | \n22 | \nnone | \nown | \n1 | \nskilled employee / official | \n1 | \nnone | \nyes | \n0 | \n
2 | \nno checking account | \n12 | \ncritical accnt. / other credits | \neducation | \n2096 | \nx < 100 DM | \n4 <= x < 7 years | \n2 | \nmale single | \nnone | \n... | \nreal estate | \n49 | \nnone | \nown | \n1 | \nunskilled-resident | \n2 | \nnone | \nyes | \n1 | \n
3 | \nx < 0 DM | \n42 | \nexisting credits paid | \nforniture | \n7882 | \nx < 100 DM | \n4 <= x < 7 years | \n2 | \nmale single | \nguarantor | \n... | \nsoc. savings / life insurance | \n45 | \nnone | \nfor free | \n1 | \nskilled employee / official | \n2 | \nnone | \nyes | \n1 | \n
4 | \nx < 0 DM | \n24 | \ndelay | \nnew car | \n4870 | \nx < 100 DM | \n1 <= x < 4 years | \n3 | \nmale single | \nnone | \n... | \nunknown | \n53 | \nnone | \nfor free | \n2 | \nskilled employee / official | \n2 | \nnone | \nyes | \n0 | \n
5 rows × 21 columns
\n