1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// CONTRACT: adamw-kernel-v1.yaml
// HASH: sha256:b2c3d4e5f6a78901
// Generated by: pv probar --binding
// DO NOT EDIT — regenerate with `pv probar --binding`
use proptest::prelude::*;
proptest! {
/// Obligation: Decoupled weight decay (invariant)
/// Formal: weight_decay applied to theta directly, not gradient
///
/// AdamW applies: theta -= lr * wd * theta (then Adam step)
/// This test verifies the decoupling by checking that with zero gradient,
/// the parameter still shrinks by lr * wd * theta.
#[test]
fn prop_decoupled_weight_decay(
theta in 0.1f32..10.0,
wd in 0.001f32..0.1,
lr in 0.001f32..0.01
) {
// After one step with zero gradient:
// theta_new = theta - lr * wd * theta = theta * (1 - lr * wd)
let expected = theta * (1.0 - lr * wd);
prop_assert!(
expected < theta,
"weight decay should shrink: expected={expected} < theta={theta}"
);
prop_assert!(
expected > 0.0,
"should remain positive: expected={expected}"
);
}
/// Obligation: Second moment non-negative (bound)
/// Formal: v_t >= 0 for all t
#[test]
fn prop_second_moment_non_negative(
grad in -100.0f32..100.0,
beta2 in 0.9f32..0.9999
) {
// v_t = beta2 * v_{t-1} + (1 - beta2) * g^2
// Starting from v_0 = 0:
let v_1 = (1.0 - beta2) * grad * grad;
prop_assert!(
v_1 >= 0.0,
"v_1={v_1}, expected >= 0"
);
// After second step:
let v_2 = beta2 * v_1 + (1.0 - beta2) * grad * grad;
prop_assert!(
v_2 >= 0.0,
"v_2={v_2}, expected >= 0"
);
}
/// Obligation: Bias correction factor >= 1 for t >= 1 (bound)
/// Formal: 1/(1 - β^t) >= 1 for β ∈ (0,1), t >= 1
/// Note: For small β and large t, β^t underflows to 0 in f32,
/// yielding correction = exactly 1.0. The strict > 1 only holds
/// when β^t is representable.
#[test]
fn prop_bias_correction_factor(
beta in 0.5f32..0.9999,
t in 1u32..100
) {
let correction = 1.0 / (1.0 - beta.powi(t as i32));
prop_assert!(
correction >= 1.0,
"bias correction={}, expected >= 1 for beta={}, t={}",
correction, beta, t
);
}
/// Obligation: Finite gradients produce finite update (invariant)
/// Formal: finite(grad) → finite(update)
#[test]
fn prop_update_finite(
grad in -10.0f32..10.0,
beta1 in 0.8f32..0.99,
beta2 in 0.99f32..0.9999,
lr in 0.0001f32..0.01
) {
let eps = 1e-8f32;
// Simulate one AdamW step
let m = (1.0 - beta1) * grad;
let v = (1.0 - beta2) * grad * grad;
let m_hat = m / (1.0 - beta1);
let v_hat = v / (1.0 - beta2);
let update = lr * m_hat / (v_hat.sqrt() + eps);
prop_assert!(
update.is_finite(),
"update={update} is not finite for grad={grad}"
);
}
/// Obligation: SIMD matches scalar within ULP (equivalence)
#[test]
#[ignore = "SIMD equivalence — trueno domain"]
fn prop_simd_equivalence(
_x in proptest::collection::vec(-10.0f32..10.0, 1..32usize)
) {
// SIMD equivalence testing is trueno's responsibility
}
}