scirs2_optimize/unconstrained/
newton.rs

1//! Newton methods for unconstrained optimization
2
3use crate::error::OptimizeError;
4use crate::unconstrained::conjugate_gradient::compute_line_bounds;
5use crate::unconstrained::result::OptimizeResult;
6use crate::unconstrained::utils::{
7    array_diff_norm, finite_difference_gradient, finite_difference_hessian,
8};
9use crate::unconstrained::{Bounds, Options};
10use scirs2_core::ndarray::{Array1, Array2, ArrayView1};
11
12/// Implements the Newton-Conjugate-Gradient algorithm for unconstrained optimization
13#[allow(dead_code)]
14pub fn minimize_newton_cg<F, S>(
15    mut fun: F,
16    x0: Array1<f64>,
17    options: &Options,
18) -> Result<OptimizeResult<S>, OptimizeError>
19where
20    F: FnMut(&ArrayView1<f64>) -> S + Clone,
21    S: Into<f64> + Clone,
22{
23    // Get options or use defaults
24    let ftol = options.ftol;
25    let gtol = options.gtol;
26    let max_iter = options.max_iter;
27    let eps = options.eps;
28    let bounds = options.bounds.as_ref();
29
30    // Initialize variables
31    let n = x0.len();
32    let mut x = x0.to_owned();
33
34    // Ensure initial point is within bounds
35    if let Some(bounds) = bounds {
36        bounds.project(x.as_slice_mut().unwrap());
37    }
38
39    // Function evaluation counter
40    let mut nfev = 0;
41
42    // Initialize function value
43    let mut f = fun(&x.view()).into();
44    nfev += 1;
45
46    // Initialize gradient
47    let mut g = finite_difference_gradient(&mut fun, &x.view(), eps)?;
48    nfev += n;
49
50    // Iteration counter
51    let mut iter = 0;
52
53    // Main optimization loop
54    while iter < max_iter {
55        // Check convergence on gradient
56        let g_norm = g.dot(&g).sqrt();
57        if g_norm < gtol {
58            break;
59        }
60
61        // Save the current function value for convergence check
62        let _f_old = f;
63
64        // Calculate the Hessian approximation using finite differences
65        let hess = finite_difference_hessian(&mut fun, &x.view(), eps)?;
66        nfev += n * n;
67
68        // Solve the Newton-CG system to find the step direction
69        let mut p = solve_newton_cg_system(&g, &hess, gtol);
70
71        // If the bounds are provided, project the search direction
72        if let Some(bounds) = bounds {
73            project_direction(&mut p, &x, Some(bounds));
74
75            // If the projected direction is zero or too small, use the projected gradient
76            let dir_norm = p.dot(&p).sqrt();
77            if dir_norm < 1e-10 {
78                // Try using the projected gradient instead
79                p = -g.clone();
80                project_direction(&mut p, &x, Some(bounds));
81
82                // If even the projected gradient is zero, we're at a constrained optimum
83                let pg_norm = p.dot(&p).sqrt();
84                if pg_norm < 1e-10 {
85                    break;
86                }
87            }
88        }
89
90        // Line search along the direction to determine the step size
91        let (alpha, f_new) = line_search_newton(&mut fun, &x, &p, f, &mut nfev, bounds);
92
93        // Take the step
94        let mut x_new = &x + &(&p * alpha);
95
96        // Ensure we're within bounds
97        if let Some(bounds) = bounds {
98            bounds.project(x_new.as_slice_mut().unwrap());
99        }
100
101        // Check if the step actually moved the point
102        let step_size = array_diff_norm(&x_new.view(), &x.view());
103        if step_size < 1e-10 {
104            // We're at a boundary constraint and can't move further
105            x = x_new;
106            break;
107        }
108
109        // Calculate the new gradient
110        let g_new = finite_difference_gradient(&mut fun, &x_new.view(), eps)?;
111        nfev += n;
112
113        // Check convergence on function value
114        if (f - f_new).abs() < ftol * (1.0 + f.abs()) {
115            x = x_new;
116            g = g_new;
117            break;
118        }
119
120        // Update variables for next iteration
121        x = x_new;
122        f = f_new;
123        g = g_new;
124
125        iter += 1;
126    }
127
128    // Use original function for final evaluation
129    let final_fun = fun(&x.view());
130
131    // Create and return result
132    Ok(OptimizeResult {
133        x,
134        fun: final_fun,
135        nit: iter,
136        func_evals: nfev,
137        nfev,
138        success: iter < max_iter,
139        message: if iter < max_iter {
140            "Optimization terminated successfully.".to_string()
141        } else {
142            "Maximum iterations reached.".to_string()
143        },
144        jacobian: Some(g),
145        hessian: None,
146    })
147}
148
149/// Solve the Newton-CG system Hx = -g using the conjugate gradient method
150#[allow(dead_code)]
151fn solve_newton_cg_system(g: &Array1<f64>, hess: &Array2<f64>, tol: f64) -> Array1<f64> {
152    let n = g.len();
153
154    // Start with x = 0
155    let mut x = Array1::zeros(n);
156
157    // If gradient is zero, return a zero step
158    if g.dot(g) < 1e-10 {
159        return x;
160    }
161
162    // Initialize residual r = -g - Hx = -g (since x=0)
163    let mut r = -g.clone();
164
165    // Initialize search direction p = r
166    let mut p = r.clone();
167
168    // Initial residual norm
169    let r0_norm = r.dot(&r).sqrt();
170
171    // Convergence tolerance (relative to initial residual)
172    let cg_tol = f64::min(0.1, r0_norm * tol);
173
174    // Maximum number of CG iterations
175    let max_cg_iters = 2 * n;
176
177    // Conjugate gradient iterations
178    for _ in 0..max_cg_iters {
179        // Compute H*p
180        let hp = hess.dot(&p);
181
182        // Compute p'*H*p
183        let php = p.dot(&hp);
184
185        // If the curvature is negative or very small, terminate the CG iterations
186        if php <= 1e-10 {
187            // Use the current direction, even if it's not optimal
188            return x;
189        }
190
191        // Compute the CG step size
192        let alpha = r.dot(&r) / php;
193
194        // Update the solution
195        x = &x + &(&p * alpha);
196
197        // Update the residual: r_{k+1} = r_k - alpha * H * p_k
198        r = &r - &(&hp * alpha);
199
200        // Check convergence
201        if r.dot(&r).sqrt() < cg_tol {
202            break;
203        }
204
205        // Calculate beta for the next conjugate direction
206        let r_new_norm_squared = r.dot(&r);
207        let r_old_norm_squared = p.dot(&p);
208        let beta = r_new_norm_squared / r_old_norm_squared;
209
210        // Update the search direction
211        p = &r + &(&p * beta);
212    }
213
214    x
215}
216
217/// Line search for Newton method with bounds support
218#[allow(dead_code)]
219fn line_search_newton<F, S>(
220    fun: &mut F,
221    x: &Array1<f64>,
222    direction: &Array1<f64>,
223    f_x: f64,
224    nfev: &mut usize,
225    bounds: Option<&Bounds>,
226) -> (f64, f64)
227where
228    F: FnMut(&ArrayView1<f64>) -> S,
229    S: Into<f64>,
230{
231    // Get bounds on the line search parameter
232    let (a_min, a_max) = if let Some(b) = bounds {
233        compute_line_bounds(x, direction, Some(b))
234    } else {
235        (f64::NEG_INFINITY, f64::INFINITY)
236    };
237
238    // Use a simple backtracking line search with bounds
239    let c1 = 1e-4; // Sufficient decrease parameter
240    let rho = 0.5; // Backtracking parameter
241
242    // Start with alpha with min(1.0, a_max) to ensure we're within bounds
243    let mut alpha = if a_max < 1.0 { a_max * 0.99 } else { 1.0 };
244
245    // If bounds fully constrain movement, return that constrained step
246    if a_max <= 0.0 || a_min >= a_max {
247        alpha = if a_max > 0.0 { a_max } else { 0.0 };
248        let x_new = x + alpha * direction;
249        *nfev += 1;
250        let f_new = fun(&x_new.view()).into();
251        return (alpha, f_new);
252    }
253
254    // Function to evaluate a point on the line
255    let mut f_line = |alpha: f64| {
256        let mut x_new = x + alpha * direction;
257
258        // Project onto bounds (if needed)
259        if let Some(bounds) = bounds {
260            bounds.project(x_new.as_slice_mut().unwrap());
261        }
262
263        *nfev += 1;
264        fun(&x_new.view()).into()
265    };
266
267    // Initial step
268    let mut f_new = f_line(alpha);
269
270    // Backtracking until Armijo condition is satisfied or we hit the lower bound
271    let slope = direction.mapv(|d| d.powi(2)).sum();
272    while f_new > f_x - c1 * alpha * slope.abs() && alpha > a_min {
273        alpha *= rho;
274
275        // Ensure alpha is at least a_min
276        if alpha < a_min {
277            alpha = a_min;
278        }
279
280        f_new = f_line(alpha);
281
282        // Prevent infinite loops for very small steps
283        if alpha < 1e-10 {
284            break;
285        }
286    }
287
288    (alpha, f_new)
289}
290
291/// Projects the search direction to ensure we don't move in a direction that
292/// immediately violates the bounds.
293#[allow(dead_code)]
294fn project_direction(direction: &mut Array1<f64>, x: &Array1<f64>, bounds: Option<&Bounds>) {
295    if bounds.is_none() {
296        return;
297    }
298
299    let bounds = bounds.unwrap();
300
301    for i in 0..x.len() {
302        let xi = x[i];
303
304        // Check if we're at a bound
305        if let Some(lb) = bounds.lower[i] {
306            if (xi - lb).abs() < 1e-10 && direction[i] < 0.0 {
307                // At lower bound and moving in negative _direction
308                direction[i] = 0.0;
309            }
310        }
311
312        if let Some(ub) = bounds.upper[i] {
313            if (xi - ub).abs() < 1e-10 && direction[i] > 0.0 {
314                // At upper bound and moving in positive _direction
315                direction[i] = 0.0;
316            }
317        }
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    use approx::assert_abs_diff_eq;
325
326    #[test]
327    fn test_newton_cg_quadratic() {
328        let quadratic = |x: &ArrayView1<f64>| -> f64 { x[0] * x[0] + 4.0 * x[1] * x[1] };
329
330        let x0 = Array1::from_vec(vec![2.0, 1.0]);
331        let options = Options::default();
332
333        let result = minimize_newton_cg(quadratic, x0, &options).unwrap();
334
335        assert!(result.success);
336        assert_abs_diff_eq!(result.x[0], 0.0, epsilon = 1e-4);
337        assert_abs_diff_eq!(result.x[1], 0.0, epsilon = 1e-4);
338    }
339
340    #[test]
341    fn test_newton_cg_with_bounds() {
342        let quadratic =
343            |x: &ArrayView1<f64>| -> f64 { (x[0] - 2.0).powi(2) + (x[1] - 3.0).powi(2) };
344
345        let x0 = Array1::from_vec(vec![0.0, 0.0]);
346        let mut options = Options::default();
347        options.max_iter = 100; // More iterations for bounded optimization
348
349        // Constrain solution to [0, 1] x [0, 1]
350        let bounds = Bounds::new(&[(Some(0.0), Some(1.0)), (Some(0.0), Some(1.0))]);
351        options.bounds = Some(bounds);
352
353        let result = minimize_newton_cg(quadratic, x0, &options).unwrap();
354
355        assert!(result.success);
356        // The optimal point (2, 3) is outside the bounds, so we should get (1, 1)
357        // Allow more tolerance for bounded optimization
358        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 0.4);
359        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 0.4);
360    }
361
362    #[test]
363    fn test_newton_cg_rosenbrock() {
364        let rosenbrock = |x: &ArrayView1<f64>| -> f64 {
365            let a = 1.0;
366            let b = 100.0;
367            (a - x[0]).powi(2) + b * (x[1] - x[0].powi(2)).powi(2)
368        };
369
370        let x0 = Array1::from_vec(vec![0.0, 0.0]);
371        let mut options = Options::default();
372        options.max_iter = 100; // Newton methods may need more iterations
373
374        let result = minimize_newton_cg(rosenbrock, x0, &options).unwrap();
375
376        assert!(result.success);
377        assert_abs_diff_eq!(result.x[0], 1.0, epsilon = 1e-3);
378        assert_abs_diff_eq!(result.x[1], 1.0, epsilon = 1e-3);
379    }
380}