aprender-core 0.31.2

Next-generation machine learning library in pure Rust
// CONTRACT: preprocessing-normalization-v1.yaml
// HASH: sha256:a1b2c3d4e5f6a7b8
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use aprender::preprocessing::{MinMaxScaler, RobustScaler, StandardScaler};
use aprender::primitives::Matrix;
use aprender::traits::Transformer;
use proptest::prelude::*;

proptest! {
    #![proptest_config(ProptestConfig::with_cases(256))]

    /// FALSIFY-PP-001: StandardScaler zero mean (invariant)
    /// Formal: |mean(StandardScaler(X)_j)| < ε for each column j
    #[test]
    fn prop_standard_scaler_zero_mean(
        n in 5usize..20,
        d in 2usize..4,
        data in proptest::collection::vec(-100.0f32..100.0, 80usize)
    ) {
        let total = n * d;
        let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
        let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");

        let mut scaler = StandardScaler::new();
        scaler.fit(&x).expect("fit succeeds");
        let transformed = scaler.transform(&x).expect("transform succeeds");

        let (rows, cols) = transformed.shape();
        for j in 0..cols {
            let col_mean: f32 = (0..rows).map(|i| transformed.get(i, j)).sum::<f32>()
                / rows as f32;
            prop_assert!(
                col_mean.abs() < 1e-4,
                "column {} mean = {}, expected ~0.0",
                j, col_mean
            );
        }
    }

    /// FALSIFY-PP-002: StandardScaler unit variance (invariant)
    /// Formal: |std(StandardScaler(X)_j) - 1.0| < ε for each non-constant column j
    #[test]
    fn prop_standard_scaler_unit_variance(
        n in 5usize..20,
        d in 2usize..4,
        data in proptest::collection::vec(-100.0f32..100.0, 80usize)
    ) {
        let total = n * d;
        let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
        let x = Matrix::from_vec(n, d, vals.clone()).expect("valid matrix dimensions");

        // Skip if any column is constant (all identical values)
        for j in 0..d {
            let first = vals[j];
            let all_same = (0..n).all(|i| (vals[i * d + j] - first).abs() < 1e-10);
            prop_assume!(!all_same);
        }

        let mut scaler = StandardScaler::new();
        scaler.fit(&x).expect("fit succeeds");
        let transformed = scaler.transform(&x).expect("transform succeeds");

        let (rows, cols) = transformed.shape();
        for j in 0..cols {
            let col_mean: f32 = (0..rows).map(|i| transformed.get(i, j)).sum::<f32>()
                / rows as f32;
            let col_var: f32 = (0..rows)
                .map(|i| {
                    let diff = transformed.get(i, j) - col_mean;
                    diff * diff
                })
                .sum::<f32>()
                / rows as f32;
            let col_std = col_var.sqrt();
            prop_assert!(
                (col_std - 1.0).abs() < 1e-3,
                "column {} std = {}, expected ~1.0",
                j, col_std
            );
        }
    }

    /// FALSIFY-PP-003: MinMaxScaler bounded output (bound)
    /// Formal: 0 - ε <= MinMaxScaler(X)_{i,j} <= 1 + ε for all i,j
    #[test]
    fn prop_minmax_scaler_bounded(
        n in 5usize..20,
        d in 2usize..4,
        data in proptest::collection::vec(-100.0f32..100.0, 80usize)
    ) {
        let total = n * d;
        let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
        let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");

        let mut scaler = MinMaxScaler::new();
        scaler.fit(&x).expect("fit succeeds");
        let transformed = scaler.transform(&x).expect("transform succeeds");

        let eps = 1e-6;
        let (rows, cols) = transformed.shape();
        for i in 0..rows {
            for j in 0..cols {
                let val = transformed.get(i, j);
                prop_assert!(
                    val >= 0.0 - eps && val <= 1.0 + eps,
                    "transformed[{}, {}] = {}, expected in [0, 1]",
                    i, j, val
                );
            }
        }
    }

    /// FALSIFY-PP-004: MinMaxScaler maps extremes correctly (invariant)
    /// Formal: min(X_j) -> 0.0, max(X_j) -> 1.0 for each column j
    #[test]
    fn prop_minmax_scaler_extremes(
        n in 5usize..20,
        d in 2usize..4,
        data in proptest::collection::vec(-100.0f32..100.0, 80usize)
    ) {
        let total = n * d;
        let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
        let x = Matrix::from_vec(n, d, vals.clone()).expect("valid matrix dimensions");

        // Skip if any column is constant (min == max)
        for j in 0..d {
            let col_min = (0..n).map(|i| vals[i * d + j]).fold(f32::INFINITY, f32::min);
            let col_max = (0..n).map(|i| vals[i * d + j]).fold(f32::NEG_INFINITY, f32::max);
            prop_assume!((col_max - col_min).abs() > 1e-10);
        }

        let mut scaler = MinMaxScaler::new();
        scaler.fit(&x).expect("fit succeeds");
        let transformed = scaler.transform(&x).expect("transform succeeds");

        let (rows, cols) = transformed.shape();
        for j in 0..cols {
            let col_min = (0..rows)
                .map(|i| transformed.get(i, j))
                .fold(f32::INFINITY, f32::min);
            let col_max = (0..rows)
                .map(|i| transformed.get(i, j))
                .fold(f32::NEG_INFINITY, f32::max);
            prop_assert!(
                col_min.abs() < 1e-5,
                "column {} min = {}, expected 0.0",
                j, col_min
            );
            prop_assert!(
                (col_max - 1.0).abs() < 1e-5,
                "column {} max = {}, expected 1.0",
                j, col_max
            );
        }
    }

    /// FALSIFY-PP-005: StandardScaler inverse roundtrip (equivalence)
    /// Formal: ||inverse_transform(transform(X)) - X||_∞ < ε
    #[test]
    fn prop_standard_scaler_inverse_roundtrip(
        n in 5usize..20,
        d in 2usize..4,
        data in proptest::collection::vec(-100.0f32..100.0, 80usize)
    ) {
        let total = n * d;
        let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
        let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");

        let mut scaler = StandardScaler::new();
        scaler.fit(&x).expect("fit succeeds");
        let transformed = scaler.transform(&x).expect("transform succeeds");
        let recovered = scaler.inverse_transform(&transformed).expect("inverse_transform succeeds");

        let (rows, cols) = x.shape();
        for i in 0..rows {
            for j in 0..cols {
                let orig = x.get(i, j);
                let rec = recovered.get(i, j);
                let diff = (orig - rec).abs();
                prop_assert!(
                    diff < 1e-3,
                    "roundtrip error at [{}, {}]: orig={}, recovered={}, diff={}",
                    i, j, orig, rec, diff
                );
            }
        }
    }

    /// FALSIFY-PP-007: RobustScaler output is finite and centered near 0
    /// Formal: all transformed values finite; median column ~0 after centering
    #[test]
    fn prop_robust_scaler_finite_centered(
        n in 5usize..20,
        d in 2usize..4,
        data in proptest::collection::vec(-100.0f32..100.0, 80usize)
    ) {
        let total = n * d;
        let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
        let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");

        let mut scaler = RobustScaler::new();
        scaler.fit(&x).expect("fit succeeds");
        let transformed = scaler.transform(&x).expect("transform succeeds");

        let (rows, cols) = transformed.shape();
        for i in 0..rows {
            for j in 0..cols {
                let val = transformed.get(i, j);
                prop_assert!(
                    val.is_finite(),
                    "RobustScaler output[{}, {}] = {}, expected finite",
                    i, j, val
                );
            }
        }

        // Check that median of each column is approximately 0 after centering
        for j in 0..cols {
            let mut col: Vec<f32> = (0..rows).map(|i| transformed.get(i, j)).collect();
            col.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
            let median = if col.len().is_multiple_of(2) {
                (col[col.len() / 2 - 1] + col[col.len() / 2]) / 2.0
            } else {
                col[col.len() / 2]
            };
            prop_assert!(
                median.abs() < 1e-3,
                "column {} median = {}, expected ~0.0 after robust centering",
                j, median
            );
        }
    }

    /// FALSIFY-PP-006: MinMaxScaler inverse roundtrip (equivalence)
    /// Formal: ||inverse_transform(transform(X)) - X||_∞ < ε
    #[test]
    fn prop_minmax_scaler_inverse_roundtrip(
        n in 5usize..20,
        d in 2usize..4,
        data in proptest::collection::vec(-100.0f32..100.0, 80usize)
    ) {
        let total = n * d;
        let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
        let x = Matrix::from_vec(n, d, vals.clone()).expect("valid matrix dimensions");

        // Skip if any column is constant (inverse is degenerate)
        for j in 0..d {
            let col_min = (0..n).map(|i| vals[i * d + j]).fold(f32::INFINITY, f32::min);
            let col_max = (0..n).map(|i| vals[i * d + j]).fold(f32::NEG_INFINITY, f32::max);
            prop_assume!((col_max - col_min).abs() > 1e-10);
        }

        let mut scaler = MinMaxScaler::new();
        scaler.fit(&x).expect("fit succeeds");
        let transformed = scaler.transform(&x).expect("transform succeeds");
        let recovered = scaler.inverse_transform(&transformed).expect("inverse_transform succeeds");

        let (rows, cols) = x.shape();
        for i in 0..rows {
            for j in 0..cols {
                let orig = x.get(i, j);
                let rec = recovered.get(i, j);
                let diff = (orig - rec).abs();
                prop_assert!(
                    diff < 1e-3,
                    "roundtrip error at [{}, {}]: orig={}, recovered={}, diff={}",
                    i, j, orig, rec, diff
                );
            }
        }
    }
}