aprender-core 0.31.2

// CONTRACT: calibration-v1.yaml
// HASH: sha256:e8f4a2b1c9d37065
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use aprender::calibration::{expected_calibration_error, maximum_calibration_error};
use proptest::prelude::*;

/// Strategy: generate a pair of (predictions, labels) where predictions
/// are f32 in [0, 1] and labels are bool, both with the same length n.
fn calibration_input_strategy() -> impl Strategy<Value = (Vec<f32>, Vec<bool>)> {
    (2usize..50).prop_flat_map(|n| {
        let preds = proptest::collection::vec(0.0f32..=1.0f32, n);
        let labels = proptest::collection::vec(proptest::bool::ANY, n);
        (preds, labels)
    })
}

proptest! {
    #![proptest_config(ProptestConfig::with_cases(256))]

    /// FALSIFY-CAL-001: ECE bounded in [0, 1]
    /// Formal: 0 <= ECE(p, y, 10) <= 1 for all p in [0,1]^n, y in {0,1}^n
    #[test]
    fn prop_ece_bounded(
        (predictions, labels) in calibration_input_strategy()
    ) {
        let ece = expected_calibration_error(&predictions, &labels, 10);
        prop_assert!(
            (0.0..=1.0).contains(&ece),
            "ECE={}, expected in [0, 1]", ece
        );
    }

    /// FALSIFY-CAL-002: MCE bounded in [0, 1]
    /// Formal: 0 <= MCE(p, y, 10) <= 1 for all p in [0,1]^n, y in {0,1}^n
    #[test]
    fn prop_mce_bounded(
        (predictions, labels) in calibration_input_strategy()
    ) {
        let mce = maximum_calibration_error(&predictions, &labels, 10);
        prop_assert!(
            (0.0..=1.0).contains(&mce),
            "MCE={}, expected in [0, 1]", mce
        );
    }

    /// FALSIFY-CAL-003: MCE >= ECE
    /// Formal: MCE(p, y, b) >= ECE(p, y, b) - epsilon for all p, y, b
    /// The maximum bin error is always at least as large as the weighted average.
    #[test]
    fn prop_mce_geq_ece(
        (predictions, labels) in calibration_input_strategy()
    ) {
        let ece = expected_calibration_error(&predictions, &labels, 10);
        let mce = maximum_calibration_error(&predictions, &labels, 10);
        let epsilon = 1e-6;
        prop_assert!(
            mce >= ece - epsilon,
            "MCE={} < ECE={} (violated MCE >= ECE)", mce, ece
        );
    }

    /// FALSIFY-CAL-004: Perfect calibration yields ~0 error.
    /// When all predictions equal 1.0 and all labels are true, every sample
    /// falls in the top bin with avg_conf=1.0 and avg_acc=1.0, so ECE=MCE=0.
    /// Similarly for predictions=0.0 with all-false labels.
    #[test]
    fn prop_perfect_calibration(
        n in 2usize..50,
        all_positive in proptest::bool::ANY,
    ) {
        let (pred_val, label_val) = if all_positive {
            (1.0_f32, true)
        } else {
            (0.0_f32, false)
        };
        let predictions = vec![pred_val; n];
        let labels = vec![label_val; n];

        let ece = expected_calibration_error(&predictions, &labels, 10);
        let mce = maximum_calibration_error(&predictions, &labels, 10);
        let epsilon = 1e-6;

        prop_assert!(
            ece.abs() < epsilon,
            "perfect calibration ECE={}, expected ~0", ece
        );
        prop_assert!(
            mce.abs() < epsilon,
            "perfect calibration MCE={}, expected ~0", mce
        );
    }

    /// FALSIFY-CAL-005: ECE non-negative
    /// Formal: ECE(p, y, 10) >= 0 for all p, y
    #[test]
    fn prop_ece_non_negative(
        (predictions, labels) in calibration_input_strategy()
    ) {
        let ece = expected_calibration_error(&predictions, &labels, 10);
        prop_assert!(
            ece >= 0.0,
            "ECE={}, expected >= 0", ece
        );
    }

    /// FALSIFY-CAL-006: n_bins doesn't break bounds
    /// Formal: for n_bins in 1..=20, ECE and MCE both in [0, 1]
    #[test]
    fn prop_nbins_invariant(
        (predictions, labels) in calibration_input_strategy(),
        n_bins in 1usize..=20,
    ) {
        let ece = expected_calibration_error(&predictions, &labels, n_bins);
        let mce = maximum_calibration_error(&predictions, &labels, n_bins);

        prop_assert!(
            (0.0..=1.0).contains(&ece),
            "ECE={} out of [0, 1] for n_bins={}", ece, n_bins
        );
        prop_assert!(
            (0.0..=1.0).contains(&mce),
            "MCE={} out of [0, 1] for n_bins={}", mce, n_bins
        );
    }
}