aprender-core 0.30.0

Next-generation machine learning library in pure Rust
//! FALSIFY contract tests for Random Forest Regressor.
//!
//! Verifies structural and statistical invariants of the ensemble:
//! - Prediction count matches input count
//! - Numerical stability (finite outputs)
//! - Deterministic predictions with fixed seed
//! - Prediction range bounded by training targets

// CONTRACT: random-forest-v1.yaml
// HASH: sha256:e5f6a7b8c9d01234
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use aprender::primitives::{Matrix, Vector};
use aprender::tree::RandomForestRegressor;
use proptest::prelude::*;

proptest! {
    #![proptest_config(ProptestConfig::with_cases(128))]

    // ──────────────────────────────────────────────────────────
    // FALSIFY-RF-001: Number of predictions = number of input samples
    // Formal: |predict(X)| = |X.rows|
    // ──────────────────────────────────────────────────────────
    /// Obligation: Prediction vector length equals number of input samples
    #[test]
    fn prop_prediction_count_matches_samples(
        n in 30usize..50,
        d in 2usize..4,
        seed in 0u64..10000,
    ) {
        // Generate features via LCG
        let mut rng = seed;
        let mut features = Vec::with_capacity(n * d);
        for _ in 0..(n * d) {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 20.0 - 10.0;
            features.push(val);
        }

        // Generate targets
        let mut targets = Vec::with_capacity(n);
        for _ in 0..n {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 100.0 - 50.0;
            targets.push(val);
        }

        let x = Matrix::from_vec(n, d, features).expect("valid dimensions");
        let y = Vector::from_slice(&targets);

        let mut model = RandomForestRegressor::new(10)
            .with_max_depth(5)
            .with_random_state(42);
        model.fit(&x, &y).expect("fit succeeds");

        let preds = model.predict(&x);

        prop_assert!(
            preds.len() == n,
            "FALSIFY-RF-001: predicted {} samples, expected {}",
            preds.len(), n
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-RF-002: Predictions are finite (no NaN/Inf)
    // Formal: forall i, predict(X)[i] is finite
    // ──────────────────────────────────────────────────────────
    /// Obligation: All predictions are finite real numbers
    #[test]
    fn prop_predictions_finite(
        n in 30usize..50,
        d in 2usize..4,
        seed in 0u64..10000,
    ) {
        // Generate features via LCG
        let mut rng = seed;
        let mut features = Vec::with_capacity(n * d);
        for _ in 0..(n * d) {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 20.0 - 10.0;
            features.push(val);
        }

        // Generate targets
        let mut targets = Vec::with_capacity(n);
        for _ in 0..n {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 100.0 - 50.0;
            targets.push(val);
        }

        let x = Matrix::from_vec(n, d, features).expect("valid dimensions");
        let y = Vector::from_slice(&targets);

        let mut model = RandomForestRegressor::new(10)
            .with_max_depth(5)
            .with_random_state(42);
        model.fit(&x, &y).expect("fit succeeds");

        let preds = model.predict(&x);

        for i in 0..preds.len() {
            prop_assert!(
                preds.as_slice()[i].is_finite(),
                "FALSIFY-RF-002: prediction[{}]={}, expected finite",
                i, preds.as_slice()[i]
            );
        }
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-RF-003: Predictions are deterministic with same seed
    // Formal: fit(seed=s) + predict(X) = fit(seed=s) + predict(X)
    // ──────────────────────────────────────────────────────────
    /// Obligation: Same seed produces identical predictions
    #[test]
    fn prop_predictions_deterministic(
        n in 30usize..50,
        d in 2usize..4,
        seed in 0u64..10000,
    ) {
        // Generate features via LCG
        let mut rng = seed;
        let mut features = Vec::with_capacity(n * d);
        for _ in 0..(n * d) {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 20.0 - 10.0;
            features.push(val);
        }

        // Generate targets
        let mut targets = Vec::with_capacity(n);
        for _ in 0..n {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 100.0 - 50.0;
            targets.push(val);
        }

        let x = Matrix::from_vec(n, d, features).expect("valid dimensions");
        let y = Vector::from_slice(&targets);

        // First run
        let mut model1 = RandomForestRegressor::new(10)
            .with_max_depth(5)
            .with_random_state(42);
        model1.fit(&x, &y).expect("fit succeeds");
        let preds1 = model1.predict(&x);

        // Second run with same seed
        let mut model2 = RandomForestRegressor::new(10)
            .with_max_depth(5)
            .with_random_state(42);
        model2.fit(&x, &y).expect("fit succeeds");
        let preds2 = model2.predict(&x);

        prop_assert!(
            preds1.len() == preds2.len(),
            "FALSIFY-RF-003: prediction lengths differ: {} vs {}",
            preds1.len(), preds2.len()
        );

        for i in 0..preds1.len() {
            prop_assert!(
                (preds1.as_slice()[i] - preds2.as_slice()[i]).abs() < 1e-10,
                "FALSIFY-RF-003: prediction[{}] differs: {} vs {} (same seed=42)",
                i, preds1.as_slice()[i], preds2.as_slice()[i]
            );
        }
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-RF-004: Predictions within reasonable range of training targets
    // Formal: min(y) - margin <= predict(X)[i] <= max(y) + margin
    //         where margin = (max(y) - min(y)) * 0.5
    // ──────────────────────────────────────────────────────────
    /// Obligation: Predictions bounded by training target range (with margin)
    #[test]
    fn prop_predictions_bounded(
        n in 30usize..50,
        d in 2usize..4,
        seed in 0u64..10000,
    ) {
        // Generate features via LCG
        let mut rng = seed;
        let mut features = Vec::with_capacity(n * d);
        for _ in 0..(n * d) {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 20.0 - 10.0;
            features.push(val);
        }

        // Generate targets
        let mut targets = Vec::with_capacity(n);
        for _ in 0..n {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 100.0 - 50.0;
            targets.push(val);
        }

        let x = Matrix::from_vec(n, d, features).expect("valid dimensions");
        let y = Vector::from_slice(&targets);

        let mut model = RandomForestRegressor::new(10)
            .with_max_depth(5)
            .with_random_state(42);
        model.fit(&x, &y).expect("fit succeeds");

        let preds = model.predict(&x);

        // Compute target range
        let y_min = targets.iter().cloned().fold(f32::INFINITY, f32::min);
        let y_max = targets.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
        let range = y_max - y_min;
        // RF predictions are averages of tree leaf values, so they should
        // stay within the training target range (trees can only predict
        // values seen in training). Allow 50% margin for averaging effects.
        let margin = range * 0.5;
        let lower = y_min - margin;
        let upper = y_max + margin;

        for i in 0..preds.len() {
            let p = preds.as_slice()[i];
            prop_assert!(
                p >= lower && p <= upper,
                "FALSIFY-RF-004: prediction[{}]={} outside [{}, {}] (y_range=[{}, {}])",
                i, p, lower, upper, y_min, y_max
            );
        }
    }

    /// Obligation: SIMD matches scalar within ULP (equivalence)
    #[test]
    #[ignore = "SIMD equivalence — trueno domain"]
    fn prop_simd_equivalence(
        _x in proptest::collection::vec(-10.0f32..10.0, 1..32usize)
    ) {
        // SIMD equivalence testing is trueno's responsibility
    }
}