fenris_optimize/
newton.rs

1use crate::calculus::{DifferentiableVectorFunction, VectorFunction};
2use fenris_traits::Real;
3use itertools::iterate;
4use log::debug;
5use nalgebra::{DVector, DVectorView, DVectorViewMut, Scalar};
6use numeric_literals::replace_float_literals;
7use std::error::Error;
8use std::fmt;
9use std::fmt::Display;
10
11#[derive(Debug, Clone)]
12pub struct NewtonResult<T>
13where
14    T: Scalar,
15{
16    pub solution: DVector<T>,
17    pub iterations: usize,
18}
19
20#[derive(Debug, Copy, Clone, PartialEq, Eq)]
21pub struct NewtonSettings<T> {
22    pub max_iterations: Option<usize>,
23    pub tolerance: T,
24}
25
26#[derive(Debug)]
27pub enum NewtonError {
28    /// The procedure failed because the maximum number of iterations was reached.
29    MaximumIterationsReached(usize),
30    /// The procedure failed because solving the Jacobian system failed.
31    JacobianError(Box<dyn Error>),
32    // The line search failed to produce a valid step direction.
33    LineSearchError(Box<dyn Error>),
34}
35
36impl Display for NewtonError {
37    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
38        match self {
39            &NewtonError::MaximumIterationsReached(maxit) => {
40                write!(f, "Failed to converge within maximum number of iterations ({}).", maxit)
41            }
42            &NewtonError::JacobianError(ref err) => {
43                write!(f, "Failed to solve Jacobian system. Error: {}", err)
44            }
45            &NewtonError::LineSearchError(ref err) => {
46                write!(f, "Line search failed to produce valid step direction. Error: {}", err)
47            }
48        }
49    }
50}
51
52impl Error for NewtonError {}
53
54/// Attempts to solve the non-linear equation F(u) = 0.
55///
56/// No heap allocation is performed. The solution is said to have converged if
57/// ```|F(u)|_2 <= tolerance```.
58///
59/// If successful, returns the number of iterations performed.
60#[replace_float_literals(T::from_f64(literal).unwrap())]
61pub fn newton<'a, T, F>(
62    function: F,
63    x: impl Into<DVectorViewMut<'a, T>>,
64    f: impl Into<DVectorViewMut<'a, T>>,
65    dx: impl Into<DVectorViewMut<'a, T>>,
66    settings: NewtonSettings<T>,
67) -> Result<usize, NewtonError>
68where
69    T: Real,
70    F: DifferentiableVectorFunction<T>,
71{
72    newton_line_search(function, x, f, dx, settings, &mut NoLineSearch {})
73}
74
75/// Same as `newton`, but allows specifying a line search.
76#[replace_float_literals(T::from_f64(literal).unwrap())]
77pub fn newton_line_search<'a, T, F>(
78    mut function: F,
79    x: impl Into<DVectorViewMut<'a, T>>,
80    f: impl Into<DVectorViewMut<'a, T>>,
81    dx: impl Into<DVectorViewMut<'a, T>>,
82    settings: NewtonSettings<T>,
83    line_search: &mut impl LineSearch<T, F>,
84) -> Result<usize, NewtonError>
85where
86    T: Real,
87    F: DifferentiableVectorFunction<T>,
88{
89    let mut x = x.into();
90    let mut f = f.into();
91    let mut minus_dx = dx.into();
92
93    assert_eq!(x.nrows(), f.nrows());
94    assert_eq!(minus_dx.nrows(), f.nrows());
95
96    function.eval_into(&mut f, &DVectorView::from(&x));
97
98    let mut iter = 0;
99
100    while f.norm() > settings.tolerance {
101        if settings
102            .max_iterations
103            .map(|max_iter| iter == max_iter)
104            .unwrap_or(false)
105        {
106            return Err(NewtonError::MaximumIterationsReached(iter));
107        }
108
109        // Solve the system J dx = -f   <=>   J (-dx) = f
110        let j_result = function.solve_jacobian_system(&mut minus_dx, &DVectorView::from(&x), &DVectorView::from(&f));
111        if let Err(err) = j_result {
112            return Err(NewtonError::JacobianError(err));
113        }
114
115        // Flip sign to make it consistent with line search
116        minus_dx *= -1.0;
117        let dx = &minus_dx;
118
119        let step_length = line_search
120            .step(
121                &mut function,
122                DVectorViewMut::from(&mut f),
123                DVectorViewMut::from(&mut x),
124                DVectorView::from(dx),
125            )
126            .map_err(|err| NewtonError::LineSearchError(err))?;
127        debug!("Newton step length at iter {}: {}", iter, step_length);
128        iter += 1;
129    }
130
131    Ok(iter)
132}
133
134pub trait LineSearch<T: Scalar, F: VectorFunction<T>> {
135    fn step(
136        &mut self,
137        function: &mut F,
138        f: DVectorViewMut<T>,
139        x: DVectorViewMut<T>,
140        direction: DVectorView<T>,
141    ) -> Result<T, Box<dyn Error>>;
142}
143
144/// Trivial implementation of line search. Equivalent to a single, full Newton step.
145#[derive(Clone, Debug)]
146pub struct NoLineSearch;
147
148impl<T, F> LineSearch<T, F> for NoLineSearch
149where
150    T: Real,
151    F: VectorFunction<T>,
152{
153    #[replace_float_literals(T::from_f64(literal).unwrap())]
154    fn step(
155        &mut self,
156        function: &mut F,
157        mut f: DVectorViewMut<T>,
158        mut x: DVectorViewMut<T>,
159        direction: DVectorView<T>,
160    ) -> Result<T, Box<dyn Error>> {
161        let p = direction;
162        x.axpy(T::one(), &p, T::one());
163        function.eval_into(&mut f, &DVectorView::from(&x));
164        Ok(T::one())
165    }
166}
167
168/// Standard backtracking line search using the Armijo condition.
169///
170/// See Jorge & Nocedal (2006), Numerical Optimization, Chapter 3.1.
171/// #[derive(Clone, Debug)]
172pub struct BacktrackingLineSearch;
173
174impl<T, F> LineSearch<T, F> for BacktrackingLineSearch
175where
176    T: Real,
177    F: VectorFunction<T>,
178{
179    #[replace_float_literals(T::from_f64(literal).unwrap())]
180    fn step(
181        &mut self,
182        function: &mut F,
183        mut f: DVectorViewMut<T>,
184        mut x: DVectorViewMut<T>,
185        direction: DVectorView<T>,
186    ) -> Result<T, Box<dyn Error>> {
187        // We seek to solve
188        //  F(x) = 0
189        // by minimizing
190        //  g(x) = (1/2) || F(x) ||^2
191        // We have that
192        //  grad g = (grad F) * F,
193        // and the sufficient decrease condition becomes
194        //  g(x_k + alpha * p_k) <= g(x_k) + c * alpha * (grad g)^T * p_k
195        //                       ~= g(x_k) - c * alpha * g(x_k)
196        //                        = (1 - c * alpha) * g(x_k)
197        // where p_k is the step direction, c is a parameter in (0, 1)
198        // and we have assumed that
199        //  grad F^T p_k ~= - F(x_k)
200        // (which it would satisfy if p_k is the exact solution of the Newton step equation).
201
202        // TODO: Is this an OK parameter? Should anyway make it configurable
203        let c = 1e-4;
204        let alpha_min = 1e-6;
205
206        let p = direction;
207        let g_initial = 0.5 * f.magnitude_squared();
208
209        // Start out with some alphas that don't decrease too quickly, then
210        // start decreasing them much faster if the first few iterations don't let us
211        // take a step.
212        let initial_alphas = [0.0, 1.0, 0.75, 0.5];
213        let mut alpha_iter = initial_alphas
214            .iter()
215            .copied()
216            .chain(iterate(0.25, |alpha_i| 0.25 * *alpha_i));
217
218        let mut alpha_prev = alpha_iter.next().unwrap();
219        let mut alpha = alpha_iter.next().unwrap();
220
221        loop {
222            let delta_alpha = alpha - alpha_prev;
223
224            // We have that x^{k + 1} = x^0 + alpha^k * p,
225            // where x^{k+1} is the value of x after taking the step based on the current alpha
226            // parameter. It is straightforward to show that this implies that
227            //  x^{k + 1} = x^k + (alpha^k - alpha^{k - 1}) * p,
228            // which is far more amenable to computation
229            x.axpy(delta_alpha, &p, T::one());
230            function.eval_into(&mut f, &DVectorView::from(&x));
231
232            let g = 0.5 * f.magnitude_squared();
233            if g <= (1.0 - c * alpha) * g_initial {
234                break;
235            } else if alpha < alpha_min {
236                return Err(Box::from(format!(
237                    "Failed to produce valid step direction.\
238                    Alpha {} is smaller than minimum allowed alpha {}.",
239                    alpha, alpha_min
240                )));
241            } else {
242                alpha_prev = alpha;
243                alpha = alpha_iter.next().unwrap();
244            }
245        }
246
247        Ok(alpha)
248    }
249}