entrenar/optim/convergence_tests/
adam_tests.rs1#[cfg(test)]
4mod tests {
5 use super::super::helpers::*;
6 use crate::optim::*;
7 use crate::Tensor;
8 use proptest::prelude::*;
9 use proptest::test_runner::Config;
10
11 proptest! {
12 #[test]
13 fn prop_adam_converges_quadratic(
14 lr in 0.05f32..0.5
15 ) {
16 let optimizer = Adam::default_params(lr);
17 prop_assert!(test_quadratic_convergence(optimizer, 100, 1.5));
18 }
19
20 #[test]
21 fn prop_adam_loss_decreases(
22 lr in 0.01f32..0.3
23 ) {
24 let optimizer = Adam::default_params(lr);
25 prop_assert!(test_loss_decreases(optimizer, 30));
26 }
27 }
28
29 proptest! {
34 #![proptest_config(Config::with_cases(1000))]
35
36 #[test]
37 fn prop_adam_ill_conditioned(
38 lr in 0.05f32..0.2,
39 beta1 in 0.85f32..0.95,
40 beta2 in 0.99f32..0.999
41 ) {
42 let optimizer = Adam::new(lr, beta1, beta2, 1e-8);
43 prop_assert!(test_ill_conditioned_convergence(optimizer, 300, 10.0));
45 }
46
47 #[test]
48 fn prop_adam_high_dim(
49 lr in 0.1f32..0.25,
50 dim in 10usize..30
51 ) {
52 let optimizer = Adam::default_params(lr);
53 prop_assert!(test_high_dim_convergence(optimizer, dim, 200, 3.0));
54 }
55
56 #[test]
57 fn prop_numerical_stability_adam(
58 lr in 0.001f32..0.5,
59 beta1 in 0.5f32..0.99,
60 beta2 in 0.9f32..0.9999
61 ) {
62 let optimizer = Adam::new(lr, beta1, beta2, 1e-8);
63 prop_assert!(test_small_gradient_stability(optimizer));
64 }
65
66 #[test]
67 fn prop_random_init_adam(
68 init in prop::collection::vec(-50.0f32..50.0, 4),
69 lr in 0.1f32..0.25
70 ) {
71 let mut params = vec![Tensor::from_vec(init.clone(), true)];
72 let mut optimizer = Adam::default_params(lr);
73 let initial_norm: f32 = init.iter().map(|x| x * x).sum();
74
75 for _ in 0..150 {
76 let grad = params[0].data().mapv(|x| 2.0 * x);
77 params[0].set_grad(grad);
78 optimizer.step(&mut params);
79 }
80
81 let final_norm: f32 = params[0].data().iter().map(|x| x * x).sum();
83 prop_assert!(final_norm < initial_norm.max(100.0));
84 }
85 }
86
87 #[test]
92 fn test_adam_rosenbrock_progress() {
93 let mut optimizer = Adam::new(0.01, 0.9, 0.999, 1e-8);
94 let mut params = vec![Tensor::from_vec(vec![-1.0, 1.0], true)];
95 let a = 1.0f32;
96 let b = 100.0f32;
97
98 let initial_loss = {
99 let x = params[0].data()[0];
100 let y = params[0].data()[1];
101 (a - x).powi(2) + b * (y - x * x).powi(2)
102 };
103
104 for _ in 0..1000 {
105 let x = params[0].data()[0];
106 let y = params[0].data()[1];
107 let dx = -2.0 * (a - x) - 4.0 * b * x * (y - x * x);
108 let dy = 2.0 * b * (y - x * x);
109 params[0].set_grad(ndarray::arr1(&[dx, dy]));
110 optimizer.step(&mut params);
111 }
112
113 let final_loss = {
114 let x = params[0].data()[0];
115 let y = params[0].data()[1];
116 (a - x).powi(2) + b * (y - x * x).powi(2)
117 };
118
119 assert!(final_loss < initial_loss);
121 }
122
123 #[test]
124 fn test_adam_beta_params_effect() {
125 let mut params_high_beta2 = vec![Tensor::from_vec(vec![10.0], true)];
128 let mut params_low_beta2 = vec![Tensor::from_vec(vec![10.0], true)];
129
130 let mut opt_high = Adam::new(0.1, 0.9, 0.999, 1e-8);
131 let mut opt_low = Adam::new(0.1, 0.9, 0.9, 1e-8);
132
133 for _ in 0..20 {
135 let grad_h = ndarray::arr1(&[2.0 * params_high_beta2[0].data()[0]]);
136 let grad_l = ndarray::arr1(&[2.0 * params_low_beta2[0].data()[0]]);
137 params_high_beta2[0].set_grad(grad_h);
138 params_low_beta2[0].set_grad(grad_l);
139 opt_high.step(&mut params_high_beta2);
140 opt_low.step(&mut params_low_beta2);
141 }
142
143 assert!(params_high_beta2[0].data()[0].is_finite());
145 assert!(params_low_beta2[0].data()[0].is_finite());
146
147 assert!(params_high_beta2[0].data()[0].abs() < 10.0);
149 assert!(params_low_beta2[0].data()[0].abs() < 10.0);
150 }
151
152 #[test]
153 fn test_optimizer_state_persistence() {
154 let mut params = vec![Tensor::from_vec(vec![10.0], true)];
156 let mut adam = Adam::default_params(0.1);
157
158 for _ in 0..10 {
160 params[0].set_grad(ndarray::arr1(&[2.0 * params[0].data()[0]]));
161 adam.step(&mut params);
162 }
163
164 let after_10 = params[0].data()[0];
165
166 for _ in 0..10 {
168 params[0].set_grad(ndarray::arr1(&[2.0 * params[0].data()[0]]));
169 adam.step(&mut params);
170 }
171
172 let after_20 = params[0].data()[0];
173
174 assert!(after_20.abs() < after_10.abs());
176 }
177
178 #[test]
179 fn test_multiple_param_groups() {
180 let mut params = vec![
182 Tensor::from_vec(vec![5.0, 5.0], true),
183 Tensor::from_vec(vec![10.0, 10.0, 10.0], true),
184 ];
185
186 let mut adam = Adam::default_params(0.2);
187
188 for _ in 0..100 {
189 for p in &mut params {
190 let grad = p.data().mapv(|x| 2.0 * x);
191 p.set_grad(grad);
192 }
193 adam.step(&mut params);
194 }
195
196 for p in ¶ms {
198 assert!(
199 p.data().iter().all(|&v| v.abs() < 5.0),
200 "Expected all values < 5.0, got {:?}",
201 p.data()
202 );
203 }
204 }
205}