From 7407ee7b48bc68537e03dba14158602e5f7218e9 Mon Sep 17 00:00:00 2001 From: Dhmhtrios Pakakis Date: Fri, 28 Nov 2025 21:36:22 +0200 Subject: [PATCH] DOC Add example: Effect of resampling on probability calibration --- .../plot_calibration_resampling.py | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 examples/model_selection/plot_calibration_resampling.py diff --git a/examples/model_selection/plot_calibration_resampling.py b/examples/model_selection/plot_calibration_resampling.py new file mode 100644 index 000000000..c0c3977d4 --- /dev/null +++ b/examples/model_selection/plot_calibration_resampling.py @@ -0,0 +1,158 @@ +""" +==================================================== +Effect of Resampling on Probability Calibration for Classifiers +==================================================== + +With this example we illustrate how resampling a data set (like Under-sampling) can affect +the calibration of a classifier's predicted probabilities , and how can we fix this issue using +:class:`~sklearn.calibration.CalibratedClassifierCV` + +When we resample a dataset so we can balance it , we change the prior probabilities +of the classes contained in the dataset.The model learns that some classes +are more frequent than they actually are and some classes are less frequent than they actually are. + +This example shows: +1. The calibration curve of a model trained on the original dataset. +2. The resampled model's calibration curve, which is distorted +3. How to recover the probabilities using calibration +""" + +# Authors: The imbalanced-learn developers +# License: MIT + +# %% +# Create an imbalanced dataset with two classes using scikit-learn's +# :func:`sklearn.datasets.make_classification` function with 95-5 class ratio +# and split the data into training and testing sets (80-20) using +# :func:`sklearn.model_selection.train_test_split.` function and set stratify = y +# because we want to keep the 95-5 class ratio. +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split +import matplotlib.pyplot as plt + +X , y = make_classification( + n_samples=10000, + n_features=20, + n_classes=2, + weights=[0.95,0.05], + ) + +X_train , X_test , y_train , y_test = train_test_split( + X,y , test_size=0.2 , random_state=42 , stratify=y + ) + +# %% +# The Problem: Resampling distorts probabilities +# ----------------------------------------------- +# At first , we train a :class:`~sklearn.linear_model.LogisticRegression` classifier on +# the original data. Then, we train a second :class:`~sklearn.linear_model.LogisticRegression` classifier +# on data that has been undersampled to a 50-50 ratio using a :class:`imblearn.under_sampling.RandomUnderSampler`. + +from sklearn.linear_model import LogisticRegression +from imblearn.under_sampling import RandomUnderSampler + +# Train Logistic Regression Model (Vanilla Model) +lr_original = LogisticRegression(random_state=42) +lr_original.fit(X_train,y_train) + +# Train Resampled Model (Under-sampling) +under_sampler = RandomUnderSampler(random_state=42) +X_undersampled , y_undersampled = under_sampler.fit_resample(X_train, y_train) + +lr_undersampled = LogisticRegression(random_state=42) +lr_undersampled.fit(X_undersampled, y_undersampled) + +# %% +# We plot the calibration curves to compare the two models using :class:`~sklearn.calibration.CalibrationDisplay`. +# The diagonal line represents a perfectly calibrated model. + +from sklearn.calibration import CalibrationDisplay + +fig, ax = plt.subplots(figsize=(8,6)) + +CalibrationDisplay.from_estimator( + lr_original, X_test, y_test, n_bins=10, name="Original model", ax=ax +) + +CalibrationDisplay.from_estimator( + lr_undersampled, X_test, y_test, n_bins=10, name="Undersampled model", ax=ax +) + +plt.title("Calibration: Original vs Resampled") +plt.show() + +# %% +# **Observation:** +# The resampled model's curve is significantly below the diagonal. It is obvious that the +# model is over-confident: it predicts high probabilities for the +# positive class, but the actual fraction of positives is much lower. + +# %% +# The Solution: Probability Calibration +# ------------------------------------ +# We use :class:`~sklearn.calibration.CalibratedClassifierCV`to calibrate the model.It is important +# to note that the calibrator needs to be trained to data with the real class distribution. Therefore we split +# the training set into two parts: +# +# ``X_model_train``: used for training and resampled +# ``X_calib``: used to train the calibrator (original distribution) + +from sklearn.calibration import CalibratedClassifierCV + +# Split the training set +X_model_train, X_calib, y_model_train, y_calib = train_test_split( + X_train, y_train, test_size=0.15, random_state=42, stratify=y_train +) + +# Resample and train the model +X_undersampled2, y_undersampled2 = under_sampler.fit_resample(X_model_train, y_model_train) +lr_resampled = LogisticRegression(random_state=42) +lr_resampled.fit(X_undersampled2, y_undersampled2) + +# Calibrate using the untouched set (X_calib,y_calib) +# We use method='sigmoid', good for logisitc regression and for few positives samples in the calib set +# We use cv='prefit' because the base model is already trained. +calibrated_model = CalibratedClassifierCV( + lr_resampled, method="sigmoid", cv="prefit" +) +calibrated_model.fit(X_calib, y_calib) + +# %% +# Comparing the Results +# --------------------- +# We plot the calibration curve of the fixed model + +fig, ax = plt.subplots(figsize=(8, 6)) + +# Plot the undersampled uncalibrated model +CalibrationDisplay.from_estimator( + lr_resampled, X_test, y_test, n_bins=10, name="Uncalibrated (Undersampled)", ax=ax +) + +# Plot the new calibrated model +CalibrationDisplay.from_estimator( + calibrated_model, X_test, y_test, n_bins=10, name="Calibrated Model", ax=ax +) + +plt.title("Effect of Calibration on Resampled Model") +plt.show() + +# %% +# We can also check that the calibration did not affect the model's ranking quality by +# checking that ROC AUC (discrimination power) has not been affected. +# In addition we can use the Brier Score metric to see the improvement in the +# probabilty accuracy. We will use :func:`sklearn.metrics.roc_auc_score` function and +# :func:`sklearn.metrics.brier_score_loss` function + +from sklearn.metrics import roc_auc_score, brier_score_loss + +# probability estimation for the train set for class 1 (minority) +uncalibrated_prob = lr_resampled.predict_proba(X_test)[:,1] +prob_calibrated = calibrated_model.predict_proba(X_test)[:, 1] + +print(f"ROC AUC (Uncalibrated): {roc_auc_score(y_test, uncalibrated_prob):.4f}") +print(f"ROC AUC (Calibrated): {roc_auc_score(y_test, prob_calibrated):.4f}") +print("-" * 30) +print("For the Brier Score the smaller is the better") +print(f"Brier Score (Uncalibrated): {brier_score_loss(y_test, uncalibrated_prob):.4f}") +print(f"Brier Score (Calibrated): {brier_score_loss(y_test, prob_calibrated):.4f}") \ No newline at end of file