aprender-core 0.31.2

Next-generation machine learning library in pure Rust
// CONTRACT: adamw-kernel-v1.yaml
// HASH: sha256:b2c3d4e5f6a78901
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`

use proptest::prelude::*;

proptest! {
    /// Obligation: Decoupled weight decay (invariant)
    /// Formal: weight_decay applied to theta directly, not gradient
    ///
    /// AdamW applies: theta -= lr * wd * theta (then Adam step)
    /// This test verifies the decoupling by checking that with zero gradient,
    /// the parameter still shrinks by lr * wd * theta.
    #[test]
    fn prop_decoupled_weight_decay(
        theta in 0.1f32..10.0,
        wd in 0.001f32..0.1,
        lr in 0.001f32..0.01
    ) {
        // After one step with zero gradient:
        // theta_new = theta - lr * wd * theta = theta * (1 - lr * wd)
        let expected = theta * (1.0 - lr * wd);
        prop_assert!(
            expected < theta,
            "weight decay should shrink: expected={expected} < theta={theta}"
        );
        prop_assert!(
            expected > 0.0,
            "should remain positive: expected={expected}"
        );
    }

    /// Obligation: Second moment non-negative (bound)
    /// Formal: v_t >= 0 for all t
    #[test]
    fn prop_second_moment_non_negative(
        grad in -100.0f32..100.0,
        beta2 in 0.9f32..0.9999
    ) {
        // v_t = beta2 * v_{t-1} + (1 - beta2) * g^2
        // Starting from v_0 = 0:
        let v_1 = (1.0 - beta2) * grad * grad;
        prop_assert!(
            v_1 >= 0.0,
            "v_1={v_1}, expected >= 0"
        );

        // After second step:
        let v_2 = beta2 * v_1 + (1.0 - beta2) * grad * grad;
        prop_assert!(
            v_2 >= 0.0,
            "v_2={v_2}, expected >= 0"
        );
    }

    /// Obligation: Bias correction factor >= 1 for t >= 1 (bound)
    /// Formal: 1/(1 - β^t) >= 1 for β ∈ (0,1), t >= 1
    /// Note: For small β and large t, β^t underflows to 0 in f32,
    /// yielding correction = exactly 1.0. The strict > 1 only holds
    /// when β^t is representable.
    #[test]
    fn prop_bias_correction_factor(
        beta in 0.5f32..0.9999,
        t in 1u32..100
    ) {
        let correction = 1.0 / (1.0 - beta.powi(t as i32));
        prop_assert!(
            correction >= 1.0,
            "bias correction={}, expected >= 1 for beta={}, t={}",
            correction, beta, t
        );
    }

    /// Obligation: Finite gradients produce finite update (invariant)
    /// Formal: finite(grad) → finite(update)
    #[test]
    fn prop_update_finite(
        grad in -10.0f32..10.0,
        beta1 in 0.8f32..0.99,
        beta2 in 0.99f32..0.9999,
        lr in 0.0001f32..0.01
    ) {
        let eps = 1e-8f32;
        // Simulate one AdamW step
        let m = (1.0 - beta1) * grad;
        let v = (1.0 - beta2) * grad * grad;
        let m_hat = m / (1.0 - beta1);
        let v_hat = v / (1.0 - beta2);
        let update = lr * m_hat / (v_hat.sqrt() + eps);

        prop_assert!(
            update.is_finite(),
            "update={update} is not finite for grad={grad}"
        );
    }

    /// Obligation: SIMD matches scalar within ULP (equivalence)
    #[test]
    #[ignore = "SIMD equivalence — trueno domain"]
    fn prop_simd_equivalence(
        _x in proptest::collection::vec(-10.0f32..10.0, 1..32usize)
    ) {
        // SIMD equivalence testing is trueno's responsibility
    }
}