{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### In this notebook we will be taking the original South German Credit dataset and adding bias against foreign workers" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%%capture \n", "#Cell Magic to hide output\n", "\n", "#For Synthetic Data Generation\n", "!pip install sdv\n", "#Imports\n", "import pandas as pd\n", "from sagemaker.s3 import S3Downloader\n", "from sdv.tabular import GaussianCopula" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Download data\n", "\n", "First, __download__ the data and save it in the `data` folder.\n", "\n", "\n", "$^{[2]}$ Ulrike Grömping\n", "Beuth University of Applied Sciences Berlin\n", "Website with contact information: https://prof.beuth-hochschule.de/groemping/." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | status | \n", "duration | \n", "credit_history | \n", "purpose | \n", "amount | \n", "savings | \n", "employment_duration | \n", "installment_rate | \n", "personal_status_sex | \n", "other_debtors | \n", "... | \n", "property | \n", "age | \n", "other_installment_plans | \n", "housing | \n", "number_credits | \n", "job | \n", "people_liable | \n", "telephone | \n", "foreign_worker | \n", "credit_risk | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "18 | \n", "4 | \n", "2 | \n", "1049 | \n", "1 | \n", "2 | \n", "4 | \n", "2 | \n", "1 | \n", "... | \n", "2 | \n", "21 | \n", "3 | \n", "1 | \n", "1 | \n", "3 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "
1 | \n", "1 | \n", "9 | \n", "4 | \n", "0 | \n", "2799 | \n", "1 | \n", "3 | \n", "2 | \n", "3 | \n", "1 | \n", "... | \n", "1 | \n", "36 | \n", "3 | \n", "1 | \n", "2 | \n", "3 | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "
2 | \n", "2 | \n", "12 | \n", "2 | \n", "9 | \n", "841 | \n", "2 | \n", "4 | \n", "2 | \n", "2 | \n", "1 | \n", "... | \n", "1 | \n", "23 | \n", "3 | \n", "1 | \n", "1 | \n", "2 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "
3 | \n", "1 | \n", "12 | \n", "4 | \n", "0 | \n", "2122 | \n", "1 | \n", "3 | \n", "3 | \n", "3 | \n", "1 | \n", "... | \n", "1 | \n", "39 | \n", "3 | \n", "1 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "
4 | \n", "1 | \n", "12 | \n", "4 | \n", "0 | \n", "2171 | \n", "1 | \n", "3 | \n", "4 | \n", "3 | \n", "1 | \n", "... | \n", "2 | \n", "38 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "
5 rows × 21 columns
\n", "\n", " | status | \n", "duration | \n", "credit_history | \n", "purpose | \n", "amount | \n", "savings | \n", "employment_duration | \n", "installment_rate | \n", "personal_status_sex | \n", "other_debtors | \n", "... | \n", "property | \n", "age | \n", "other_installment_plans | \n", "housing | \n", "number_credits | \n", "job | \n", "people_liable | \n", "telephone | \n", "foreign_worker | \n", "credit_risk | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "3 | \n", "13 | \n", "3 | \n", "2 | \n", "3583 | \n", "3 | \n", "3 | \n", "3 | \n", "2 | \n", "1 | \n", "... | \n", "3 | \n", "55 | \n", "3 | \n", "2 | \n", "1 | \n", "3 | \n", "2 | \n", "2 | \n", "1 | \n", "0 | \n", "
1 | \n", "4 | \n", "10 | \n", "0 | \n", "2 | \n", "1148 | \n", "4 | \n", "3 | \n", "3 | \n", "3 | \n", "2 | \n", "... | \n", "2 | \n", "30 | \n", "3 | \n", "2 | \n", "1 | \n", "3 | \n", "2 | \n", "1 | \n", "1 | \n", "0 | \n", "
2 | \n", "2 | \n", "13 | \n", "1 | \n", "2 | \n", "2032 | \n", "2 | \n", "4 | \n", "2 | \n", "3 | \n", "3 | \n", "... | \n", "1 | \n", "22 | \n", "2 | \n", "1 | \n", "1 | \n", "3 | \n", "2 | \n", "1 | \n", "1 | \n", "0 | \n", "
3 | \n", "1 | \n", "24 | \n", "3 | \n", "4 | \n", "6541 | \n", "5 | \n", "2 | \n", "4 | \n", "2 | \n", "2 | \n", "... | \n", "3 | \n", "31 | \n", "2 | \n", "2 | \n", "1 | \n", "3 | \n", "2 | \n", "2 | \n", "1 | \n", "0 | \n", "
4 | \n", "4 | \n", "9 | \n", "4 | \n", "2 | \n", "2129 | \n", "2 | \n", "4 | \n", "2 | \n", "3 | \n", "1 | \n", "... | \n", "2 | \n", "38 | \n", "3 | \n", "2 | \n", "2 | \n", "3 | \n", "1 | \n", "2 | \n", "1 | \n", "0 | \n", "
5 rows × 21 columns
\n", "\n", " | status | \n", "duration | \n", "credit_history | \n", "purpose | \n", "amount | \n", "savings | \n", "employment_duration | \n", "installment_rate | \n", "personal_status_sex | \n", "other_debtors | \n", "... | \n", "property | \n", "age | \n", "other_installment_plans | \n", "housing | \n", "number_credits | \n", "job | \n", "people_liable | \n", "telephone | \n", "foreign_worker | \n", "credit_risk | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "18 | \n", "4 | \n", "2 | \n", "1049 | \n", "1 | \n", "2 | \n", "4 | \n", "2 | \n", "1 | \n", "... | \n", "2 | \n", "21 | \n", "3 | \n", "1 | \n", "1 | \n", "3 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "
1 | \n", "1 | \n", "9 | \n", "4 | \n", "0 | \n", "2799 | \n", "1 | \n", "3 | \n", "2 | \n", "3 | \n", "1 | \n", "... | \n", "1 | \n", "36 | \n", "3 | \n", "1 | \n", "2 | \n", "3 | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "
2 | \n", "2 | \n", "12 | \n", "2 | \n", "9 | \n", "841 | \n", "2 | \n", "4 | \n", "2 | \n", "2 | \n", "1 | \n", "... | \n", "1 | \n", "23 | \n", "3 | \n", "1 | \n", "1 | \n", "2 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "
3 | \n", "1 | \n", "12 | \n", "4 | \n", "0 | \n", "2122 | \n", "1 | \n", "3 | \n", "3 | \n", "3 | \n", "1 | \n", "... | \n", "1 | \n", "39 | \n", "3 | \n", "1 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "
4 | \n", "1 | \n", "12 | \n", "4 | \n", "0 | \n", "2171 | \n", "1 | \n", "3 | \n", "4 | \n", "3 | \n", "1 | \n", "... | \n", "2 | \n", "38 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "
5 rows × 21 columns
\n", "