Skip to main content

entrenar/optim/convergence_tests/
sgd_tests.rs

1//! SGD optimizer convergence tests
2
3#[cfg(test)]
4mod tests {
5    use super::super::helpers::*;
6    use crate::optim::*;
7    use crate::Tensor;
8    use proptest::prelude::*;
9    use proptest::test_runner::Config;
10
11    proptest! {
12        #[test]
13        fn prop_sgd_converges_quadratic(
14            lr in 0.01f32..0.5,
15            momentum in 0.0f32..0.9
16        ) {
17            let optimizer = SGD::new(lr, momentum);
18            prop_assert!(test_quadratic_convergence(optimizer, 100, 1.0));
19        }
20
21        #[test]
22        fn prop_sgd_loss_decreases(
23            lr in 0.01f32..0.3
24        ) {
25            let optimizer = SGD::new(lr, 0.0);
26            prop_assert!(test_loss_decreases(optimizer, 50));
27        }
28    }
29
30    #[test]
31    fn test_sgd_with_momentum_faster_than_no_momentum() {
32        let mut params_with = vec![Tensor::from_vec(vec![10.0], true)];
33        let mut params_without = vec![Tensor::from_vec(vec![10.0], true)];
34
35        let mut opt_with = SGD::new(0.1, 0.9);
36        let mut opt_without = SGD::new(0.1, 0.0);
37
38        for _ in 0..20 {
39            // Same gradient for both
40            let grad = ndarray::arr1(&[2.0 * params_with[0].data()[0]]);
41            params_with[0].set_grad(grad.clone());
42            params_without[0].set_grad(grad);
43
44            opt_with.step(&mut params_with);
45            opt_without.step(&mut params_without);
46        }
47
48        // SGD with momentum should converge faster (closer to 0)
49        assert!(params_with[0].data()[0].abs() < params_without[0].data()[0].abs());
50    }
51
52    // ========================================================================
53    // EXTENDED PROPERTY TESTS - High iteration counts for quality validation
54    // ========================================================================
55
56    proptest! {
57        #![proptest_config(Config::with_cases(1000))]
58
59        #[test]
60        fn prop_sgd_rosenbrock(
61            lr in 0.0001f32..0.001,
62            momentum in 0.8f32..0.99
63        ) {
64            let mut optimizer = SGD::new(lr, momentum);
65            // Rosenbrock is hard - just check it doesn't diverge
66            let mut params = vec![Tensor::from_vec(vec![0.0, 0.0], true)];
67            for _ in 0..500 {
68                let x = params[0].data()[0];
69                let y = params[0].data()[1];
70                let dx = -2.0 * (1.0 - x) - 400.0 * x * (y - x * x);
71                let dy = 200.0 * (y - x * x);
72                params[0].set_grad(ndarray::arr1(&[dx, dy]));
73                optimizer.step(&mut params);
74            }
75            prop_assert!(params[0].data().iter().all(|&v| v.is_finite()));
76        }
77
78        #[test]
79        fn prop_sgd_high_dim(
80            lr in 0.05f32..0.15,
81            dim in 10usize..30
82        ) {
83            let optimizer = SGD::new(lr, 0.9);
84            prop_assert!(test_high_dim_convergence(optimizer, dim, 300, 2.0));
85        }
86
87        #[test]
88        fn prop_random_init_sgd(
89            init in prop::collection::vec(-50.0f32..50.0, 4),
90            lr in 0.05f32..0.2
91        ) {
92            let mut params = vec![Tensor::from_vec(init.clone(), true)];
93            let mut optimizer = SGD::new(lr, 0.9);
94            let initial_norm: f32 = init.iter().map(|x| x * x).sum();
95
96            for _ in 0..150 {
97                let grad = params[0].data().mapv(|x| 2.0 * x);
98                params[0].set_grad(grad);
99                optimizer.step(&mut params);
100            }
101
102            // Should make progress (reduce norm)
103            let final_norm: f32 = params[0].data().iter().map(|x| x * x).sum();
104            prop_assert!(final_norm < initial_norm.max(100.0));
105        }
106    }
107
108    // ========================================================================
109    // DETERMINISTIC CONVERGENCE TESTS
110    // ========================================================================
111
112    #[test]
113    fn test_sgd_momentum_behavior() {
114        // Test that SGD with momentum accumulates velocity
115        // and continues moving even with reduced gradient
116        let mut params = vec![Tensor::from_vec(vec![10.0], true)];
117        let mut opt = SGD::new(0.01, 0.9);
118
119        // Apply gradient for several steps to build up momentum
120        for _ in 0..10 {
121            params[0].set_grad(ndarray::arr1(&[2.0 * params[0].data()[0]]));
122            opt.step(&mut params);
123        }
124        let after_10 = params[0].data()[0];
125
126        // Now apply zero gradient - momentum should still cause movement
127        params[0].set_grad(ndarray::arr1(&[0.0]));
128        opt.step(&mut params);
129        let after_zero_grad = params[0].data()[0];
130
131        // Should have moved due to accumulated momentum
132        assert!(
133            (after_zero_grad - after_10).abs() > 1e-6,
134            "Momentum should cause movement even with zero gradient"
135        );
136
137        // Both should converge (not diverge)
138        assert!(after_10.abs() < 10.0);
139        assert!(after_zero_grad.is_finite());
140    }
141}