COMPAS - Bias#

This notebook computes the racial bias of COMPAS decile score using different metrics.

import pandas as pd
from fairscoring.metrics import bias_metric_pe, bias_metric_eo, bias_metric_cal, \
    WassersteinMetric, CalibrationMetric
from fairscoring.metrics.roc import bias_metric_roc, bias_metric_xroc

from tqdm.notebook import tqdm

Setting#

Load COMPAS data#

dataURL = 'https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv'
df = pd.read_csv(dataURL)

df.rename(columns=dict((column_name, column_name.lower()) for column_name in df.columns),
          inplace=True)

score_column = 'decile_score'
target_column = 'two_year_recid'
protected_attribute_column = 'race'

# Get Columns
scores = df[score_column]
target = df[target_column]
attribute = df[protected_attribute_column]

# Groups to compare
groups = ['African-American', 'Caucasian']
# groups = ['African-American', None]    # None = all others

favorable_target = 0

List of bias metrics#

metrics = [
    bias_metric_eo,     # Standardized Equal Opportunity
    bias_metric_pe,     # Standardized Predictive Equality
    bias_metric_cal,    # Standardized Calibration Equality
    bias_metric_roc,    # ROC-Bias
    bias_metric_xroc,   # xROC-Bias
    WassersteinMetric(fairness_type="EO",name="Equal Opportunity (U)", score_transform="rescale"),
    WassersteinMetric(fairness_type="PE",name="Predictive Equality (U)", score_transform="rescale"),
    CalibrationMetric(weighting="scores",name="Calibration (U)", score_transform="rescale"),
]

Bias Measures#

Compute Bias Table#

Compute all bias metrics for the dataset

results = []
for metric in tqdm(metrics):
    # Compute bias
    bias = metric.bias(
        scores, target, attribute,
        groups=groups,
        favorable_target=favorable_target,
        min_score=1, max_score=10,
        n_permute=1000, seed=2579,
        prefer_high_scores=False
    )

    # Store result
    results.append((metric, bias))
C:\dev\fair-scoring-public\src\fairscoring\metrics\calibration.py:81: RuntimeWarning: invalid value encountered in divide
  fraction_of_positives = np.where(nonzero, bin_true / bin_total, np.nan)
C:\dev\fair-scoring-public\src\fairscoring\metrics\calibration.py:82: RuntimeWarning: invalid value encountered in divide
  mean_predicted_value = np.where(nonzero, bin_sums / bin_total, np.nan)
C:\dev\fair-scoring-public\src\fairscoring\metrics\calibration.py:81: RuntimeWarning: invalid value encountered in divide
  fraction_of_positives = np.where(nonzero, bin_true / bin_total, np.nan)
C:\dev\fair-scoring-public\src\fairscoring\metrics\calibration.py:82: RuntimeWarning: invalid value encountered in divide
  mean_predicted_value = np.where(nonzero, bin_sums / bin_total, np.nan)

Result Table#

This corresponds to Tab. 1 and Tab. C1 in the publication.

results = [[
    metric.name,
    f"{bias.bias:.3f}",
    f"{bias.pos_component:.0%}",
    f"{bias.neg_component:.0%}",
    f"{bias.p_value:.2f}" ] for metric, bias in results
]

df = pd.DataFrame(results, columns=["metric", "total", "pos", "neg", "p-value"])
df.set_index("metric", inplace=True)
df
total pos neg p-value
metric
Equal Opportunity 0.161 0% 100% 0.00
Predictive Equality 0.154 0% 100% 0.00
Calibration 0.034 79% 21% 0.30
ROC bias 0.016 46% 54% 0.31
xROC bias 0.273 0% 100% 0.00
Equal Opportunity (U) 0.152 0% 100% 0.00
Predictive Equality (U) 0.163 0% 100% 0.00
Calibration (U) 0.037 78% 22% 0.23