aprender-core 0.31.2

Next-generation machine learning library in pure Rust
//! FALSIFY contract tests for Gradient Boosting Classifier
//!
//! Verifies invariants of GradientBoostingClassifier fit/predict
//! using property-based testing (proptest).

// CONTRACT: gbm-v1.yaml
// HASH: sha256:c5d6e7f8091a2b34
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use aprender::primitives::Matrix;
use aprender::tree::GradientBoostingClassifier;
use proptest::prelude::*;

proptest! {
    #![proptest_config(ProptestConfig::with_cases(64))]

    // ──────────────────────────────────────────────────────────
    // FALSIFY-GBM-001: Predictions are binary {0, 1}
    // Formal: forall pred in predict(X), pred in {0, 1}
    // ──────────────────────────────────────────────────────────
    /// Obligation: All predictions are binary class labels
    #[test]
    fn prop_predictions_binary(
        n in 30usize..50,
        d in 2usize..4,
        seed in 0u64..10000,
    ) {
        // Build pseudo-random feature data and binary labels using LCG
        let mut rng = seed;
        let mut next_f32 = || -> f32 {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 10.0 - 5.0
        };

        let features: Vec<f32> = (0..(n * d)).map(|_| next_f32()).collect();
        let labels: Vec<usize> = (0..n).map(|_| {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            (rng >> 33) as usize % 2
        }).collect();

        let x = Matrix::from_vec(n, d, features).expect("valid matrix dimensions");

        let mut model = GradientBoostingClassifier::new()
            .with_n_estimators(10)
            .with_learning_rate(0.1)
            .with_max_depth(3);
        model.fit(&x, &labels).expect("fit succeeds");
        let predictions = model.predict(&x).expect("predict succeeds");

        for (i, &pred) in predictions.iter().enumerate() {
            prop_assert!(
                pred == 0 || pred == 1,
                "FALSIFY-GBM-001: prediction[{}]={}, expected 0 or 1", i, pred
            );
        }
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-GBM-002: Predictions are deterministic
    // Formal: predict(X) = predict(X) for same fitted model
    // ──────────────────────────────────────────────────────────
    /// Obligation: Same input produces identical predictions
    #[test]
    fn prop_predictions_deterministic(
        n in 30usize..50,
        d in 2usize..4,
        seed in 0u64..10000,
    ) {
        let mut rng = seed;
        let mut next_f32 = || -> f32 {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 10.0 - 5.0
        };

        let features: Vec<f32> = (0..(n * d)).map(|_| next_f32()).collect();
        let labels: Vec<usize> = (0..n).map(|_| {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            (rng >> 33) as usize % 2
        }).collect();

        let x = Matrix::from_vec(n, d, features).expect("valid matrix dimensions");

        let mut model = GradientBoostingClassifier::new()
            .with_n_estimators(10)
            .with_learning_rate(0.1)
            .with_max_depth(3);
        model.fit(&x, &labels).expect("fit succeeds");

        let pred1 = model.predict(&x).expect("predict succeeds (1st)");
        let pred2 = model.predict(&x).expect("predict succeeds (2nd)");

        prop_assert!(
            pred1 == pred2,
            "FALSIFY-GBM-002: predictions differ between calls"
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-GBM-003: Well-separated clusters yield accuracy > 0.8
    // Formal: accuracy(predict(X), y) > 0.8 for linearly separable data
    // ──────────────────────────────────────────────────────────
    /// Obligation: GBM achieves high accuracy on well-separated data
    #[test]
    fn prop_separable_data_high_accuracy(
        n_per_class in 15usize..25,
        seed in 0u64..10000,
    ) {
        let n = 2 * n_per_class;

        // Simple LCG for reproducible noise in [-0.5, 0.5]
        let mut rng = seed;
        let mut next_noise = || -> f32 {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let val = (rng >> 33) as f32 / u32::MAX as f32;
            val - 0.5
        };

        let mut data = Vec::with_capacity(n * 2);
        let mut labels = Vec::with_capacity(n);

        // Cluster 0 centered at (-3, -3)
        for _ in 0..n_per_class {
            data.push(-3.0 + next_noise());
            data.push(-3.0 + next_noise());
            labels.push(0);
        }
        // Cluster 1 centered at (3, 3)
        for _ in 0..n_per_class {
            data.push(3.0 + next_noise());
            data.push(3.0 + next_noise());
            labels.push(1);
        }

        let x = Matrix::from_vec(n, 2, data).expect("valid matrix dimensions");

        let mut model = GradientBoostingClassifier::new()
            .with_n_estimators(10)
            .with_learning_rate(0.1)
            .with_max_depth(3);
        model.fit(&x, &labels).expect("fit succeeds");
        let predictions = model.predict(&x).expect("predict succeeds");

        let correct: usize = predictions
            .iter()
            .zip(labels.iter())
            .filter(|(&pred, &actual)| pred == actual)
            .count();
        let accuracy = correct as f32 / n as f32;

        prop_assert!(
            accuracy > 0.8,
            "FALSIFY-GBM-003: accuracy={} ({}/{}), expected > 0.8 with well-separated clusters",
            accuracy, correct, n
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-GBM-004: Number of predictions = number of samples
    // Formal: |predict(X)| = |X.rows|
    // ──────────────────────────────────────────────────────────
    /// Obligation: Output length matches input sample count
    #[test]
    fn prop_prediction_count_matches_samples(
        n in 30usize..50,
        d in 2usize..4,
        seed in 0u64..10000,
    ) {
        let mut rng = seed;
        let mut next_f32 = || -> f32 {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 10.0 - 5.0
        };

        let features: Vec<f32> = (0..(n * d)).map(|_| next_f32()).collect();
        let labels: Vec<usize> = (0..n).map(|_| {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            (rng >> 33) as usize % 2
        }).collect();

        let x = Matrix::from_vec(n, d, features).expect("valid matrix dimensions");

        let mut model = GradientBoostingClassifier::new()
            .with_n_estimators(10)
            .with_learning_rate(0.1)
            .with_max_depth(3);
        model.fit(&x, &labels).expect("fit succeeds");
        let predictions = model.predict(&x).expect("predict succeeds");

        prop_assert!(
            predictions.len() == n,
            "FALSIFY-GBM-004: predicted {} samples, expected {} (d={})",
            predictions.len(), n, d
        );
    }
}