aprender-core 0.29.3

Next-generation machine learning library in pure Rust
// CONTRACT: decision-tree-v1.yaml
// HASH: sha256:a2b3c4d5e6f78901
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use aprender::primitives::Matrix;
use aprender::tree::{gini_impurity, gini_split, DecisionTreeClassifier};
use proptest::prelude::*;

proptest! {
    #![proptest_config(ProptestConfig::with_cases(256))]

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DT-001: Gini bounded in [0, 1)
    // Formal: gini_impurity(labels) ∈ [0, 1) for all label vectors
    // ──────────────────────────────────────────────────────────
    /// Obligation: Gini impurity is always in [0, 1)
    #[test]
    fn prop_gini_bounded(
        n in 1usize..50,
        k in 2usize..6,
        seed in 0u64..10000,
    ) {
        // Build labels using LCG for deterministic pseudo-randomness
        let mut rng = seed;
        let labels: Vec<usize> = (0..n).map(|_| {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            (rng >> 33) as usize % k
        }).collect();

        let gini = gini_impurity(&labels);

        prop_assert!(
            gini >= 0.0,
            "FALSIFY-DT-001: gini={}, expected >= 0.0", gini
        );
        prop_assert!(
            gini < 1.0,
            "FALSIFY-DT-001: gini={}, expected < 1.0", gini
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DT-002: Gini pure node = 0
    // Formal: gini_impurity([c, c, ..., c]) = 0 for any class c
    // ──────────────────────────────────────────────────────────
    /// Obligation: Pure node has zero impurity
    #[test]
    fn prop_gini_pure_zero(
        n in 1usize..50,
        class in 0usize..10,
    ) {
        let labels = vec![class; n];
        let gini = gini_impurity(&labels);

        prop_assert!(
            gini.abs() < 1e-6,
            "FALSIFY-DT-002: gini_impurity(pure)={}, expected ~0.0", gini
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DT-003: Gini split reduction
    // Formal: gini_split(left, right) <= gini_impurity(all) + epsilon
    // ──────────────────────────────────────────────────────────
    /// Obligation: Weighted split impurity does not exceed parent impurity
    #[test]
    fn prop_gini_split_reduction(
        n in 4usize..50,
        k in 2usize..6,
        seed in 0u64..10000,
    ) {
        // Build labels using LCG
        let mut rng = seed;
        let labels: Vec<usize> = (0..n).map(|_| {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            (rng >> 33) as usize % k
        }).collect();

        // Split at a random point (at least 1 element on each side)
        rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
        let split_point = 1 + ((rng >> 33) as usize % (n - 2)).max(0);

        let left = &labels[..split_point];
        let right = &labels[split_point..];

        let parent_gini = gini_impurity(&labels);
        let split_gini = gini_split(left, right);

        let eps = 1e-6;
        prop_assert!(
            split_gini <= parent_gini + eps,
            "FALSIFY-DT-003: gini_split={} > gini_impurity(all)={} + eps",
            split_gini, parent_gini
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DT-004: Gini split bounded
    // Formal: gini_split(left, right) ∈ [0, 1) for random splits
    // ──────────────────────────────────────────────────────────
    /// Obligation: Weighted Gini split is bounded in [0, 1)
    #[test]
    fn prop_gini_split_bounded(
        n in 4usize..50,
        k in 2usize..6,
        seed in 0u64..10000,
    ) {
        // Build labels using LCG
        let mut rng = seed;
        let labels: Vec<usize> = (0..n).map(|_| {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            (rng >> 33) as usize % k
        }).collect();

        // Split at a random point
        rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
        let split_point = 1 + ((rng >> 33) as usize % (n - 2)).max(0);

        let left = &labels[..split_point];
        let right = &labels[split_point..];

        let split = gini_split(left, right);

        prop_assert!(
            split >= 0.0,
            "FALSIFY-DT-004: gini_split={}, expected >= 0.0", split
        );
        prop_assert!(
            split < 1.0,
            "FALSIFY-DT-004: gini_split={}, expected < 1.0", split
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DT-005: Gini maximum at uniform distribution
    // Formal: gini_impurity(uniform K) ≈ 1 - 1/K
    // ──────────────────────────────────────────────────────────
    /// Obligation: Uniform distribution achieves theoretical maximum impurity
    #[test]
    fn prop_gini_max_at_uniform(
        k in 2usize..7,
        repeats in 1usize..20,
    ) {
        // Build perfectly uniform labels: [0, 1, ..., k-1] repeated
        let labels: Vec<usize> = (0..k).cycle().take(k * repeats).collect();

        let gini = gini_impurity(&labels);
        let expected = 1.0 - 1.0 / (k as f32);

        prop_assert!(
            (gini - expected).abs() < 1e-5,
            "FALSIFY-DT-005: gini_impurity(uniform K={})={}, expected ~{}",
            k, gini, expected
        );
    }

    // ──────────────────────────────────────────────────────────
    // FALSIFY-DT-006: Fit-predict class range
    // Formal: ∀ pred ∈ predict(X), pred ∈ {training classes}
    // ──────────────────────────────────────────────────────────
    /// Obligation: Predictions are always within the set of training classes
    #[test]
    fn prop_fit_predict_class_range(
        n in 10usize..30,
        k in 2usize..4,
        seed in 0u64..10000,
    ) {
        let n_features = 2;

        // Build pseudo-random feature data using LCG
        let mut rng = seed;
        let mut features = Vec::with_capacity(n * n_features);
        for _ in 0..(n * n_features) {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let v = ((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 10.0 - 5.0;
            features.push(v);
        }

        // Build labels in [0, k)
        let labels: Vec<usize> = (0..n).map(|_| {
            rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            (rng >> 33) as usize % k
        }).collect();

        let x = Matrix::from_vec(n, n_features, features).expect("valid dimensions");

        let mut tree = DecisionTreeClassifier::new().with_max_depth(5);
        tree.fit(&x, &labels).expect("fit succeeds");

        let predictions = tree.predict(&x);

        // Collect the set of training classes
        let mut train_classes: Vec<usize> = labels.clone();
        train_classes.sort_unstable();
        train_classes.dedup();

        prop_assert!(
            predictions.len() == n,
            "FALSIFY-DT-006: predicted {} samples, expected {}",
            predictions.len(), n
        );

        for (i, &pred) in predictions.iter().enumerate() {
            prop_assert!(
                train_classes.contains(&pred),
                "FALSIFY-DT-006: prediction[{}]={}, not in training classes {:?}",
                i, pred, train_classes
            );
        }
    }
}