Skip to main content

entrenar/optim/convergence_tests/
adam_tests.rs

1//! Adam optimizer convergence tests
2
3#[cfg(test)]
4mod tests {
5    use super::super::helpers::*;
6    use crate::optim::*;
7    use crate::Tensor;
8    use proptest::prelude::*;
9    use proptest::test_runner::Config;
10
11    proptest! {
12        #[test]
13        fn prop_adam_converges_quadratic(
14            lr in 0.05f32..0.5
15        ) {
16            let optimizer = Adam::default_params(lr);
17            prop_assert!(test_quadratic_convergence(optimizer, 100, 1.5));
18        }
19
20        #[test]
21        fn prop_adam_loss_decreases(
22            lr in 0.01f32..0.3
23        ) {
24            let optimizer = Adam::default_params(lr);
25            prop_assert!(test_loss_decreases(optimizer, 30));
26        }
27    }
28
29    // ========================================================================
30    // EXTENDED PROPERTY TESTS - High iteration counts for quality validation
31    // ========================================================================
32
33    proptest! {
34        #![proptest_config(Config::with_cases(1000))]
35
36        #[test]
37        fn prop_adam_ill_conditioned(
38            lr in 0.05f32..0.2,
39            beta1 in 0.85f32..0.95,
40            beta2 in 0.99f32..0.999
41        ) {
42            let optimizer = Adam::new(lr, beta1, beta2, 1e-8);
43            // Relaxed threshold - ill-conditioned problems are hard
44            prop_assert!(test_ill_conditioned_convergence(optimizer, 300, 10.0));
45        }
46
47        #[test]
48        fn prop_adam_high_dim(
49            lr in 0.1f32..0.25,
50            dim in 10usize..30
51        ) {
52            let optimizer = Adam::default_params(lr);
53            prop_assert!(test_high_dim_convergence(optimizer, dim, 200, 3.0));
54        }
55
56        #[test]
57        fn prop_numerical_stability_adam(
58            lr in 0.001f32..0.5,
59            beta1 in 0.5f32..0.99,
60            beta2 in 0.9f32..0.9999
61        ) {
62            let optimizer = Adam::new(lr, beta1, beta2, 1e-8);
63            prop_assert!(test_small_gradient_stability(optimizer));
64        }
65
66        #[test]
67        fn prop_random_init_adam(
68            init in prop::collection::vec(-50.0f32..50.0, 4),
69            lr in 0.1f32..0.25
70        ) {
71            let mut params = vec![Tensor::from_vec(init.clone(), true)];
72            let mut optimizer = Adam::default_params(lr);
73            let initial_norm: f32 = init.iter().map(|x| x * x).sum();
74
75            for _ in 0..150 {
76                let grad = params[0].data().mapv(|x| 2.0 * x);
77                params[0].set_grad(grad);
78                optimizer.step(&mut params);
79            }
80
81            // Should make progress (reduce norm)
82            let final_norm: f32 = params[0].data().iter().map(|x| x * x).sum();
83            prop_assert!(final_norm < initial_norm.max(100.0));
84        }
85    }
86
87    // ========================================================================
88    // DETERMINISTIC CONVERGENCE TESTS
89    // ========================================================================
90
91    #[test]
92    fn test_adam_rosenbrock_progress() {
93        let mut optimizer = Adam::new(0.01, 0.9, 0.999, 1e-8);
94        let mut params = vec![Tensor::from_vec(vec![-1.0, 1.0], true)];
95        let a = 1.0f32;
96        let b = 100.0f32;
97
98        let initial_loss = {
99            let x = params[0].data()[0];
100            let y = params[0].data()[1];
101            (a - x).powi(2) + b * (y - x * x).powi(2)
102        };
103
104        for _ in 0..1000 {
105            let x = params[0].data()[0];
106            let y = params[0].data()[1];
107            let dx = -2.0 * (a - x) - 4.0 * b * x * (y - x * x);
108            let dy = 2.0 * b * (y - x * x);
109            params[0].set_grad(ndarray::arr1(&[dx, dy]));
110            optimizer.step(&mut params);
111        }
112
113        let final_loss = {
114            let x = params[0].data()[0];
115            let y = params[0].data()[1];
116            (a - x).powi(2) + b * (y - x * x).powi(2)
117        };
118
119        // Should make progress
120        assert!(final_loss < initial_loss);
121    }
122
123    #[test]
124    fn test_adam_beta_params_effect() {
125        // Test that Adam with different beta2 affects update stability
126        // Higher beta2 = more smoothing of second moment = more stable updates
127        let mut params_high_beta2 = vec![Tensor::from_vec(vec![10.0], true)];
128        let mut params_low_beta2 = vec![Tensor::from_vec(vec![10.0], true)];
129
130        let mut opt_high = Adam::new(0.1, 0.9, 0.999, 1e-8);
131        let mut opt_low = Adam::new(0.1, 0.9, 0.9, 1e-8);
132
133        // Run for several steps
134        for _ in 0..20 {
135            let grad_h = ndarray::arr1(&[2.0 * params_high_beta2[0].data()[0]]);
136            let grad_l = ndarray::arr1(&[2.0 * params_low_beta2[0].data()[0]]);
137            params_high_beta2[0].set_grad(grad_h);
138            params_low_beta2[0].set_grad(grad_l);
139            opt_high.step(&mut params_high_beta2);
140            opt_low.step(&mut params_low_beta2);
141        }
142
143        // Both should converge (neither should be NaN/Inf)
144        assert!(params_high_beta2[0].data()[0].is_finite());
145        assert!(params_low_beta2[0].data()[0].is_finite());
146
147        // Both should make progress toward 0
148        assert!(params_high_beta2[0].data()[0].abs() < 10.0);
149        assert!(params_low_beta2[0].data()[0].abs() < 10.0);
150    }
151
152    #[test]
153    fn test_optimizer_state_persistence() {
154        // Test that optimizer state (momentum, m/v) persists correctly
155        let mut params = vec![Tensor::from_vec(vec![10.0], true)];
156        let mut adam = Adam::default_params(0.1);
157
158        // Run some steps
159        for _ in 0..10 {
160            params[0].set_grad(ndarray::arr1(&[2.0 * params[0].data()[0]]));
161            adam.step(&mut params);
162        }
163
164        let after_10 = params[0].data()[0];
165
166        // Run 10 more
167        for _ in 0..10 {
168            params[0].set_grad(ndarray::arr1(&[2.0 * params[0].data()[0]]));
169            adam.step(&mut params);
170        }
171
172        let after_20 = params[0].data()[0];
173
174        // Should continue converging
175        assert!(after_20.abs() < after_10.abs());
176    }
177
178    #[test]
179    fn test_multiple_param_groups() {
180        // Test optimizer with multiple parameter tensors
181        let mut params = vec![
182            Tensor::from_vec(vec![5.0, 5.0], true),
183            Tensor::from_vec(vec![10.0, 10.0, 10.0], true),
184        ];
185
186        let mut adam = Adam::default_params(0.2);
187
188        for _ in 0..100 {
189            for p in &mut params {
190                let grad = p.data().mapv(|x| 2.0 * x);
191                p.set_grad(grad);
192            }
193            adam.step(&mut params);
194        }
195
196        // All should converge toward 0 (relaxed threshold)
197        for p in &params {
198            assert!(
199                p.data().iter().all(|&v| v.abs() < 5.0),
200                "Expected all values < 5.0, got {:?}",
201                p.data()
202            );
203        }
204    }
205}