Skip to main content

entrenar/optim/convergence_tests/
integration_tests.rs

1//! Integration tests that compare different optimizers
2
3#[cfg(test)]
4mod tests {
5    use crate::optim::*;
6    use crate::Tensor;
7    use approx::assert_abs_diff_eq;
8
9    #[test]
10    fn test_adam_faster_than_sgd() {
11        let mut params_adam = vec![Tensor::from_vec(vec![10.0, -10.0], true)];
12        let mut params_sgd = vec![Tensor::from_vec(vec![10.0, -10.0], true)];
13
14        let mut adam = Adam::default_params(0.1);
15        let mut sgd = SGD::new(0.1, 0.0);
16
17        for _ in 0..30 {
18            // Same gradient for both
19            let grad = params_adam[0].data().mapv(|x| 2.0 * x);
20            params_adam[0].set_grad(grad.clone());
21            params_sgd[0].set_grad(grad);
22
23            adam.step(&mut params_adam);
24            sgd.step(&mut params_sgd);
25        }
26
27        // Adam typically converges faster on this problem
28        let adam_norm: f32 = params_adam[0].data().iter().map(|&x| x * x).sum::<f32>().sqrt();
29        let sgd_norm: f32 = params_sgd[0].data().iter().map(|&x| x * x).sum::<f32>().sqrt();
30
31        assert!(adam_norm < sgd_norm);
32    }
33
34    #[test]
35    fn test_optimizer_with_zero_gradients() {
36        let mut params = vec![Tensor::from_vec(vec![1.0, 2.0], true)];
37        params[0].set_grad(ndarray::arr1(&[0.0, 0.0]));
38
39        let mut adam = Adam::default_params(0.1);
40        let initial = params[0].data().to_owned();
41
42        adam.step(&mut params);
43
44        // With zero gradients, Adam should still update due to momentum
45        // but the change should be minimal after one step
46        for i in 0..2 {
47            assert_abs_diff_eq!(params[0].data()[i], initial[i], epsilon = 0.1);
48        }
49    }
50
51    #[test]
52    fn test_gradient_clipping_integration() {
53        use crate::optim::clip_grad_norm;
54
55        let mut params = vec![Tensor::from_vec(vec![1.0], true)];
56
57        // Set large gradient
58        params[0].set_grad(ndarray::arr1(&[100.0]));
59
60        // Clip to max_norm = 1.0
61        let global_norm = clip_grad_norm(&mut params, 1.0);
62
63        assert_abs_diff_eq!(global_norm, 100.0, epsilon = 1e-6);
64        assert_abs_diff_eq!(
65            params[0].grad().expect("gradient should be available")[0],
66            1.0,
67            epsilon = 1e-6
68        );
69
70        // Now optimizer step with clipped gradient
71        let mut adam = Adam::default_params(0.1);
72        adam.step(&mut params);
73
74        // Should have moved, but not by the full 100.0 gradient
75        assert!(params[0].data()[0] < 1.0);
76        assert!(params[0].data()[0] > 0.5);
77    }
78
79    #[test]
80    fn test_learning_rate_scheduler_integration() {
81        use crate::optim::{CosineAnnealingLR, LRScheduler};
82
83        let mut params = vec![Tensor::from_vec(vec![5.0], true)];
84        let mut optimizer = SGD::new(0.3, 0.0);
85        let mut scheduler = CosineAnnealingLR::default_min(0.3, 10);
86
87        let mut losses = Vec::new();
88
89        for _ in 0..10 {
90            // Compute loss and gradient
91            let x = params[0].data()[0];
92            losses.push(x * x);
93
94            let grad = ndarray::arr1(&[2.0 * x]);
95            params[0].set_grad(grad);
96
97            // Update with current learning rate
98            scheduler.apply(&mut optimizer);
99            optimizer.step(&mut params);
100            scheduler.step();
101        }
102
103        // Loss should decrease over time
104        for i in 1..losses.len() {
105            assert!(losses[i] < losses[i - 1]);
106        }
107
108        // Final loss should be small
109        assert!(losses[losses.len() - 1] < 1.0);
110    }
111}