aprender-core 0.31.2

// CONTRACT: metrics-classification-v1.yaml
// HASH: sha256:a1b2c3d4e5f60789
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use aprender::metrics::classification::{
    accuracy, confusion_matrix, f1_score, precision, recall, Average,
};
use proptest::prelude::*;

/// Strategy: generate a pair of label vectors (y_pred, y_true) with
/// `n` elements in `0..n_classes`. Both vectors have the same length
/// and at least 1 element. Classes are in 0..n_classes.
fn label_pair_strategy() -> impl Strategy<Value = (Vec<usize>, Vec<usize>)> {
    (1usize..50, 2usize..6).prop_flat_map(|(n, n_classes)| {
        let pred = proptest::collection::vec(0..n_classes, n);
        let truth = proptest::collection::vec(0..n_classes, n);
        (pred, truth)
    })
}

/// Strategy: generate a label vector where every class in 0..n_classes
/// appears at least once. Used for perfect-classification tests where
/// Macro averaging requires all classes to have support > 0.
fn dense_label_strategy() -> impl Strategy<Value = Vec<usize>> {
    (2usize..6)
        .prop_flat_map(|n_classes| {
            // Start with one of each class, then add random extras
            let extras = proptest::collection::vec(0..n_classes, 0..20);
            (Just(n_classes), extras)
        })
        .prop_map(|(n_classes, extras)| {
            let mut labels: Vec<usize> = (0..n_classes).collect();
            labels.extend(extras);
            labels
        })
        .prop_shuffle()
}

proptest! {
    #![proptest_config(ProptestConfig::with_cases(500))]

    /// FALSIFY-CM-001: Accuracy bounded (accuracy ∈ [0, 1])
    /// Formal: 0 ≤ accuracy(ŷ, y) ≤ 1 for all ŷ, y
    #[test]
    fn prop_accuracy_bounded(
        (y_pred, y_true) in label_pair_strategy()
    ) {
        let acc = accuracy(&y_pred, &y_true);
        prop_assert!(
            (0.0..=1.0).contains(&acc),
            "accuracy={}, expected in [0, 1]", acc
        );
    }

    /// FALSIFY-CM-002: Precision bounded (precision ∈ [0, 1])
    /// Formal: 0 ≤ precision(ŷ, y, avg) ≤ 1 for all ŷ, y, avg ∈ {Micro, Macro, Weighted}
    #[test]
    fn prop_precision_bounded(
        (y_pred, y_true) in label_pair_strategy(),
        avg in prop_oneof![
            Just(Average::Micro),
            Just(Average::Macro),
            Just(Average::Weighted),
        ]
    ) {
        let prec = precision(&y_pred, &y_true, avg);
        prop_assert!(
            (0.0..=1.0).contains(&prec),
            "precision={}, expected in [0, 1]", prec
        );

        // Also verify recall is bounded (same contract)
        let rec = recall(&y_pred, &y_true, avg);
        prop_assert!(
            (0.0..=1.0).contains(&rec),
            "recall={}, expected in [0, 1]", rec
        );
    }

    /// FALSIFY-CM-003: F1 harmonic mean (F1 ≤ max(precision, recall))
    /// Formal: F1(ŷ, y, avg) ≤ max(precision(ŷ, y, avg), recall(ŷ, y, avg))
    /// The harmonic mean of two numbers never exceeds the arithmetic mean,
    /// and the arithmetic mean never exceeds the maximum.
    #[test]
    fn prop_f1_harmonic_mean_bound(
        (y_pred, y_true) in label_pair_strategy(),
        avg in prop_oneof![
            Just(Average::Micro),
            Just(Average::Macro),
            Just(Average::Weighted),
        ]
    ) {
        let prec = precision(&y_pred, &y_true, avg);
        let rec = recall(&y_pred, &y_true, avg);
        let f1 = f1_score(&y_pred, &y_true, avg);

        let max_pr = prec.max(rec);

        // F1 is bounded in [0, 1]
        prop_assert!(
            (0.0..=1.0).contains(&f1),
            "f1={}, expected in [0, 1]", f1
        );

        // Harmonic mean ≤ max of its inputs (with epsilon for float rounding)
        prop_assert!(
            f1 <= max_pr + 1e-6,
            "F1={} > max(precision={}, recall={})={}", f1, prec, rec, max_pr
        );
    }

    /// FALSIFY-CM-004: Confusion matrix conservation (sum(CM) = n)
    /// Formal: Σᵢⱼ CM[i,j] = |y| for all ŷ, y
    /// Every sample is accounted for exactly once in the confusion matrix.
    #[test]
    fn prop_confusion_matrix_conservation(
        (y_pred, y_true) in label_pair_strategy()
    ) {
        let n = y_true.len();
        let cm = confusion_matrix(&y_pred, &y_true);
        let (rows, cols) = cm.shape();

        // Matrix is square
        prop_assert_eq!(rows, cols);

        // Sum of all elements equals number of samples
        let mut total = 0usize;
        for i in 0..rows {
            for j in 0..cols {
                total += cm.get(i, j);
            }
        }
        prop_assert!(
            total == n,
            "sum(CM)={} != n={}", total, n
        );
    }

    /// FALSIFY-CM-005: Perfect classification (accuracy = precision = recall = F1 = 1 when ŷ = y)
    /// Formal: ŷ = y ⟹ accuracy = precision = recall = F1 = 1.0
    ///
    /// Uses dense labels (every class 0..k represented) so Macro averaging
    /// has non-zero support for every class.
    #[test]
    fn prop_perfect_classification(
        y in dense_label_strategy()
    ) {
        let acc = accuracy(&y, &y);
        prop_assert!(
            (acc - 1.0).abs() < 1e-6,
            "perfect accuracy={}, expected 1.0", acc
        );

        for avg in &[Average::Micro, Average::Macro, Average::Weighted] {
            let prec = precision(&y, &y, *avg);
            let rec = recall(&y, &y, *avg);
            let f1 = f1_score(&y, &y, *avg);

            prop_assert!(
                (prec - 1.0).abs() < 1e-6,
                "perfect precision={}, expected 1.0 (avg={:?})", prec, avg
            );
            prop_assert!(
                (rec - 1.0).abs() < 1e-6,
                "perfect recall={}, expected 1.0 (avg={:?})", rec, avg
            );
            prop_assert!(
                (f1 - 1.0).abs() < 1e-6,
                "perfect F1={}, expected 1.0 (avg={:?})", f1, avg
            );
        }

        // Perfect confusion matrix: only diagonal elements are non-zero
        let cm = confusion_matrix(&y, &y);
        let (rows, cols) = cm.shape();
        for i in 0..rows {
            for j in 0..cols {
                let val = cm.get(i, j);
                if i == j {
                    // Diagonal: count of class i samples (must be >0 for dense labels)
                    prop_assert!(
                        val > 0,
                        "diagonal CM[{},{}]={}, expected >0 for dense labels", i, j, val
                    );
                } else {
                    prop_assert!(
                        val == 0,
                        "off-diagonal CM[{},{}]={}, expected 0 for perfect classification", i, j, val
                    );
                }
            }
        }
    }

    /// FALSIFY-CM-006: Micro-average identity (micro_precision = micro_recall = accuracy)
    /// Formal: precision(ŷ, y, Micro) = recall(ŷ, y, Micro) = accuracy(ŷ, y)
    /// In multi-class settings, micro-averaged precision and recall both equal accuracy.
    #[test]
    fn prop_micro_average_identity(
        (y_pred, y_true) in label_pair_strategy()
    ) {
        let acc = accuracy(&y_pred, &y_true);
        let micro_prec = precision(&y_pred, &y_true, Average::Micro);
        let micro_rec = recall(&y_pred, &y_true, Average::Micro);

        prop_assert!(
            (micro_prec - acc).abs() < 1e-6,
            "micro_precision={} != accuracy={}", micro_prec, acc
        );
        prop_assert!(
            (micro_rec - acc).abs() < 1e-6,
            "micro_recall={} != accuracy={}", micro_rec, acc
        );

        // Corollary: micro F1 also equals accuracy when micro_prec = micro_rec
        let micro_f1 = f1_score(&y_pred, &y_true, Average::Micro);
        prop_assert!(
            (micro_f1 - acc).abs() < 1e-6,
            "micro_f1={} != accuracy={}", micro_f1, acc
        );
    }
}