Skip to main content

entrenar/optim/convergence_tests/
helpers.rs

1//! Shared test helpers for optimizer convergence tests
2//!
3//! These helpers provide common functions used across optimizer tests:
4//! - Quadratic convergence (convex, optimal solution at origin)
5//! - Rosenbrock function (non-convex, tests valley navigation)
6//! - Ill-conditioned problems (tests numerical stability)
7//! - High-dimensional problems (tests scalability)
8//! - Numerical edge cases (very small/large gradients)
9
10#[cfg(test)]
11use crate::optim::Optimizer;
12#[cfg(test)]
13use crate::Tensor;
14
15/// Test that optimizer converges on f(x) = x^2
16#[cfg(test)]
17pub fn test_quadratic_convergence<O: Optimizer>(
18    mut optimizer: O,
19    iterations: usize,
20    threshold: f32,
21) -> bool {
22    let mut params = vec![Tensor::from_vec(vec![3.0, -2.0, 1.5, -2.5], true)];
23
24    for _ in 0..iterations {
25        // Compute gradient: grad(x^2) = 2x
26        let grad = params[0].data().mapv(|x| 2.0 * x);
27        params[0].set_grad(grad);
28
29        optimizer.step(&mut params);
30    }
31
32    // All parameters should converge close to 0
33    params[0].data().iter().all(|&val| val.abs() < threshold)
34}
35
36/// Test that optimizer decreases loss monotonically
37#[cfg(test)]
38pub fn test_loss_decreases<O: Optimizer>(mut optimizer: O, iterations: usize) -> bool {
39    let mut params = vec![Tensor::from_vec(vec![10.0], true)];
40    let mut prev_loss = f32::INFINITY;
41
42    for _ in 0..iterations {
43        // Compute loss and gradient for f(x) = x^2
44        let x = params[0].data()[0];
45        let loss = x * x;
46        let grad = ndarray::arr1(&[2.0 * x]);
47
48        // Loss should decrease (or stay same if converged)
49        if loss > prev_loss + 1e-3 {
50            return false; // Loss increased significantly
51        }
52
53        prev_loss = loss;
54        params[0].set_grad(grad);
55        optimizer.step(&mut params);
56    }
57
58    true
59}
60
61/// Test Rosenbrock function convergence (non-convex)
62/// f(x,y) = (a-x)^2 + b(y-x^2)^2, minimum at (a, a^2)
63#[cfg(test)]
64#[allow(dead_code)]
65pub fn test_rosenbrock_convergence<O: Optimizer>(
66    mut optimizer: O,
67    iterations: usize,
68    threshold: f32,
69) -> bool {
70    // Start from [0, 0], optimal is [1, 1] for a=1, b=100
71    let mut params = vec![Tensor::from_vec(vec![0.0, 0.0], true)];
72    let a = 1.0f32;
73    let b = 100.0f32;
74
75    for _ in 0..iterations {
76        let x = params[0].data()[0];
77        let y = params[0].data()[1];
78
79        // Gradient of Rosenbrock
80        // df/dx = -2(a-x) - 4bx(y-x^2)
81        // df/dy = 2b(y-x^2)
82        let dx = -2.0 * (a - x) - 4.0 * b * x * (y - x * x);
83        let dy = 2.0 * b * (y - x * x);
84
85        let grad = ndarray::arr1(&[dx, dy]);
86        params[0].set_grad(grad);
87        optimizer.step(&mut params);
88    }
89
90    // Check if converged to [1, 1]
91    let x = params[0].data()[0];
92    let y = params[0].data()[1];
93    (x - 1.0).abs() < threshold && (y - 1.0).abs() < threshold
94}
95
96/// Test ill-conditioned quadratic (high condition number)
97/// f(x) = 0.5 * x^T * A * x where A has eigenvalues [1, 100]
98#[cfg(test)]
99pub fn test_ill_conditioned_convergence<O: Optimizer>(
100    mut optimizer: O,
101    iterations: usize,
102    threshold: f32,
103) -> bool {
104    // 2D ill-conditioned problem: f(x,y) = 0.5*(x^2 + 100*y^2)
105    let mut params = vec![Tensor::from_vec(vec![10.0, 10.0], true)];
106
107    for _ in 0..iterations {
108        let x = params[0].data()[0];
109        let y = params[0].data()[1];
110
111        // Gradient: [x, 100*y]
112        let grad = ndarray::arr1(&[x, 100.0 * y]);
113        params[0].set_grad(grad);
114        optimizer.step(&mut params);
115    }
116
117    // Should converge to [0, 0]
118    params[0].data().iter().all(|&val| val.abs() < threshold)
119}
120
121/// Test high-dimensional problem
122#[cfg(test)]
123pub fn test_high_dim_convergence<O: Optimizer>(
124    mut optimizer: O,
125    dim: usize,
126    iterations: usize,
127    threshold: f32,
128) -> bool {
129    let init: Vec<f32> = (0..dim).map(|i| (i as f32 + 1.0) * 0.5).collect();
130    let mut params = vec![Tensor::from_vec(init, true)];
131
132    for _ in 0..iterations {
133        // Gradient of f(x) = sum(x_i^2) is 2*x
134        let grad = params[0].data().mapv(|x| 2.0 * x);
135        params[0].set_grad(grad);
136        optimizer.step(&mut params);
137    }
138
139    params[0].data().iter().all(|&val| val.abs() < threshold)
140}
141
142/// Test numerical stability with very small gradients
143#[cfg(test)]
144pub fn test_small_gradient_stability<O: Optimizer>(mut optimizer: O) -> bool {
145    let mut params = vec![Tensor::from_vec(vec![1e-6, 1e-6], true)];
146
147    for _ in 0..100 {
148        let grad = params[0].data().mapv(|x| 2.0 * x);
149        params[0].set_grad(grad);
150        optimizer.step(&mut params);
151    }
152
153    // Should not produce NaN or Inf
154    params[0].data().iter().all(|&val| val.is_finite())
155}
156
157/// Test numerical stability with large gradients
158#[cfg(test)]
159pub fn test_large_gradient_stability<O: Optimizer>(mut optimizer: O) -> bool {
160    let mut params = vec![Tensor::from_vec(vec![1e4, 1e4], true)];
161
162    for _ in 0..100 {
163        let grad = params[0].data().mapv(|x| 2.0 * x);
164        params[0].set_grad(grad);
165        optimizer.step(&mut params);
166    }
167
168    // Should not produce NaN or Inf
169    params[0].data().iter().all(|&val| val.is_finite())
170}