aprender-core 0.29.1

Next-generation machine learning library in pure Rust
//! FALSIFY contract tests for drift detection
//!
//! Tests fundamental statistical invariants of drift detection:
//! - Identical distributions produce NoDrift
//! - Large mean shift triggers Warning or Drift
//! - Drift scores are non-negative when present
//! - Below min_samples returns NoDrift (insufficient data guard)

// CONTRACT: drift-v1.yaml
// HASH: sha256:d8e9f0a1b2c34567
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use aprender::metrics::drift::{DriftConfig, DriftDetector, DriftStatus};
use aprender::primitives::Vector;
use proptest::prelude::*;

/// Strategy: generate reference data with 50+ elements.
/// Values are in a bounded range to ensure meaningful standard deviations.
fn reference_data_strategy() -> impl Strategy<Value = Vec<f32>> {
    proptest::collection::vec(-5.0f32..5.0f32, 50..=100)
}

proptest! {
    #![proptest_config(ProptestConfig::with_cases(256))]

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DRIFT-001: Identical distributions → NoDrift
    // Formal: detect_univariate(X, X) == NoDrift
    // ──────────────────────────────────────────────────────────
    /// Obligation: When reference and current data are identical,
    /// the detector must report NoDrift. The normalized distance
    /// |μ_ref - μ_cur| / σ_ref = 0, which is below any threshold.
    #[test]
    fn prop_identical_distributions_no_drift(
        data in reference_data_strategy(),
    ) {
        let reference = Vector::from_slice(&data);
        let current = Vector::from_slice(&data);
        let detector = DriftDetector::new(DriftConfig::default());

        let status = detector.detect_univariate(&reference, &current);

        prop_assert!(
            matches!(status, DriftStatus::NoDrift),
            "FALSIFY-DRIFT-001: identical data produced {:?}, expected NoDrift",
            status
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DRIFT-002: Shifted distribution → Warning or Drift
    // Formal: detect_univariate(X, X + offset) ∈ {Warning, Drift} for large offset
    // ──────────────────────────────────────────────────────────
    /// Obligation: When current data is shifted by a large offset (10.0+),
    /// the normalized distance |offset| / σ_ref >> drift_threshold (0.2),
    /// so the detector must report Warning or Drift.
    #[test]
    fn prop_shifted_distribution_detects_drift(
        data in reference_data_strategy(),
        offset in 10.0f32..50.0f32,
    ) {
        let reference = Vector::from_slice(&data);

        // Shift all current values by a large offset
        let shifted: Vec<f32> = data.iter().map(|&v| v + offset).collect();
        let current = Vector::from_slice(&shifted);

        let detector = DriftDetector::new(DriftConfig::default());
        let status = detector.detect_univariate(&reference, &current);

        prop_assert!(
            matches!(status, DriftStatus::Warning { .. } | DriftStatus::Drift { .. }),
            "FALSIFY-DRIFT-002: shifted by {} produced {:?}, expected Warning or Drift",
            offset, status
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DRIFT-003: Drift score is non-negative when present
    // Formal: ∀ status with score, score >= 0
    // ──────────────────────────────────────────────────────────
    /// Obligation: Whenever drift detection returns a score (Warning or Drift),
    /// that score must be non-negative. The score is |μ_ref - μ_cur| / σ_ref,
    /// which is always >= 0 by construction.
    #[test]
    fn prop_drift_score_non_negative(
        data in reference_data_strategy(),
        shift in -20.0f32..20.0f32,
    ) {
        let reference = Vector::from_slice(&data);
        let shifted: Vec<f32> = data.iter().map(|&v| v + shift).collect();
        let current = Vector::from_slice(&shifted);

        let detector = DriftDetector::new(DriftConfig::default());
        let status = detector.detect_univariate(&reference, &current);

        if let Some(score) = status.score() {
            prop_assert!(
                score >= 0.0,
                "FALSIFY-DRIFT-003: drift score = {}, expected >= 0.0",
                score
            );
        }
        // NoDrift has no score — that is fine, nothing to assert
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DRIFT-004: Below min_samples → NoDrift
    // Formal: |reference| < min_samples ∨ |current| < min_samples → NoDrift
    // ──────────────────────────────────────────────────────────
    /// Obligation: When either reference or current has fewer samples than
    /// min_samples, the detector must return NoDrift regardless of the data.
    /// This prevents false alarms from insufficient statistical evidence.
    #[test]
    fn prop_below_min_samples_no_drift(
        // Generate small vectors (1 to 29 elements, below default min_samples=30)
        ref_data in proptest::collection::vec(-100.0f32..100.0f32, 1..30),
        cur_data in proptest::collection::vec(-100.0f32..100.0f32, 1..30),
    ) {
        let reference = Vector::from_slice(&ref_data);
        let current = Vector::from_slice(&cur_data);

        // Use default config with min_samples=30
        let detector = DriftDetector::new(DriftConfig::default());
        let status = detector.detect_univariate(&reference, &current);

        prop_assert!(
            matches!(status, DriftStatus::NoDrift),
            "FALSIFY-DRIFT-004: {} ref samples, {} cur samples produced {:?}, expected NoDrift (min_samples=30)",
            ref_data.len(), cur_data.len(), status
        );
    }
}