entrenar/optim/convergence_tests/
sgd_tests.rs1#[cfg(test)]
4mod tests {
5 use super::super::helpers::*;
6 use crate::optim::*;
7 use crate::Tensor;
8 use proptest::prelude::*;
9 use proptest::test_runner::Config;
10
11 proptest! {
12 #[test]
13 fn prop_sgd_converges_quadratic(
14 lr in 0.01f32..0.5,
15 momentum in 0.0f32..0.9
16 ) {
17 let optimizer = SGD::new(lr, momentum);
18 prop_assert!(test_quadratic_convergence(optimizer, 100, 1.0));
19 }
20
21 #[test]
22 fn prop_sgd_loss_decreases(
23 lr in 0.01f32..0.3
24 ) {
25 let optimizer = SGD::new(lr, 0.0);
26 prop_assert!(test_loss_decreases(optimizer, 50));
27 }
28 }
29
30 #[test]
31 fn test_sgd_with_momentum_faster_than_no_momentum() {
32 let mut params_with = vec![Tensor::from_vec(vec![10.0], true)];
33 let mut params_without = vec![Tensor::from_vec(vec![10.0], true)];
34
35 let mut opt_with = SGD::new(0.1, 0.9);
36 let mut opt_without = SGD::new(0.1, 0.0);
37
38 for _ in 0..20 {
39 let grad = ndarray::arr1(&[2.0 * params_with[0].data()[0]]);
41 params_with[0].set_grad(grad.clone());
42 params_without[0].set_grad(grad);
43
44 opt_with.step(&mut params_with);
45 opt_without.step(&mut params_without);
46 }
47
48 assert!(params_with[0].data()[0].abs() < params_without[0].data()[0].abs());
50 }
51
52 proptest! {
57 #![proptest_config(Config::with_cases(1000))]
58
59 #[test]
60 fn prop_sgd_rosenbrock(
61 lr in 0.0001f32..0.001,
62 momentum in 0.8f32..0.99
63 ) {
64 let mut optimizer = SGD::new(lr, momentum);
65 let mut params = vec![Tensor::from_vec(vec![0.0, 0.0], true)];
67 for _ in 0..500 {
68 let x = params[0].data()[0];
69 let y = params[0].data()[1];
70 let dx = -2.0 * (1.0 - x) - 400.0 * x * (y - x * x);
71 let dy = 200.0 * (y - x * x);
72 params[0].set_grad(ndarray::arr1(&[dx, dy]));
73 optimizer.step(&mut params);
74 }
75 prop_assert!(params[0].data().iter().all(|&v| v.is_finite()));
76 }
77
78 #[test]
79 fn prop_sgd_high_dim(
80 lr in 0.05f32..0.15,
81 dim in 10usize..30
82 ) {
83 let optimizer = SGD::new(lr, 0.9);
84 prop_assert!(test_high_dim_convergence(optimizer, dim, 300, 2.0));
85 }
86
87 #[test]
88 fn prop_random_init_sgd(
89 init in prop::collection::vec(-50.0f32..50.0, 4),
90 lr in 0.05f32..0.2
91 ) {
92 let mut params = vec![Tensor::from_vec(init.clone(), true)];
93 let mut optimizer = SGD::new(lr, 0.9);
94 let initial_norm: f32 = init.iter().map(|x| x * x).sum();
95
96 for _ in 0..150 {
97 let grad = params[0].data().mapv(|x| 2.0 * x);
98 params[0].set_grad(grad);
99 optimizer.step(&mut params);
100 }
101
102 let final_norm: f32 = params[0].data().iter().map(|x| x * x).sum();
104 prop_assert!(final_norm < initial_norm.max(100.0));
105 }
106 }
107
108 #[test]
113 fn test_sgd_momentum_behavior() {
114 let mut params = vec![Tensor::from_vec(vec![10.0], true)];
117 let mut opt = SGD::new(0.01, 0.9);
118
119 for _ in 0..10 {
121 params[0].set_grad(ndarray::arr1(&[2.0 * params[0].data()[0]]));
122 opt.step(&mut params);
123 }
124 let after_10 = params[0].data()[0];
125
126 params[0].set_grad(ndarray::arr1(&[0.0]));
128 opt.step(&mut params);
129 let after_zero_grad = params[0].data()[0];
130
131 assert!(
133 (after_zero_grad - after_10).abs() > 1e-6,
134 "Momentum should cause movement even with zero gradient"
135 );
136
137 assert!(after_10.abs() < 10.0);
139 assert!(after_zero_grad.is_finite());
140 }
141}