1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
//! This package contains an implementation of
//! [BFGS](https://en.wikipedia.org/w/index.php?title=BFGS_method), an algorithm for minimizing
//! convex twice-differentiable functions.
//!
//! In this example, we minimize a 2d function:
//!
//! ```rust
//! extern crate bfgs;
//! extern crate ndarray;
//!
//! use ndarray::prelude::*;
//!
//! fn main() {
//!     let x0 = Array::from_vec(vec![8.888, 1.234]);  // Chosen arbitrarily
//!     let f = |x: &Array1<f64>| x.dot(x);
//!     let g = |x: &Array1<f64>| 2.0 * x;
//!     let x_min = bfgs::bfgs(x0, f, g);
//!     assert_eq!(x_min, Ok(Array::from_vec(vec![0.0, 0.0])));
//! }
//! ```
#[cfg(not(test))]
extern crate ndarray;
#[cfg(test)]
#[macro_use(array)]
extern crate ndarray;
#[cfg(test)]
extern crate spectral;

use ndarray::{Array1, Array2};
use std::f64::INFINITY;

const F64_MACHINE_EPSILON: f64 = 2e-53;

// From the L-BFGS paper (Zhu et al. 1994), 1e7 is for "moderate accuracy." 1e12 for "low
// accuracy," 10 for "high accuracy." If factr is 0, the algorithm will only stop if the value of f
// stops improving completely.
const FACTR: f64 = 1e7;

// This is FTOL from Zhu et al.
const F_TOLERANCE: f64 = FACTR * F64_MACHINE_EPSILON;

// Dumbly try many values of epsilon, taking the best one
// Return the value of epsilon that minimizes f
fn line_search<F>(f: F) -> Result<f64, ()>
    where
        F: Fn(f64) -> f64,
{
    let mut best_epsilon = 0.0;
    let mut best_val_f = INFINITY;

    for i in -20..20 {
        let epsilon = 2.0_f64.powi(i);
        let val_f = f(epsilon);
        if val_f < best_val_f {
            best_epsilon = epsilon;
            best_val_f = val_f;
        }
    }
    if best_epsilon == 0.0 {
        Err(())
    } else {
        Ok(best_epsilon)
    }
}

fn new_identity_matrix(len: usize) -> Array2<f64> {
    let mut result = Array2::zeros((len, len));
    for z in result.diag_mut() {
        *z = 1.0;
    }
    result
}

// If the improvement in f is not too much bigger than the rounding error, then call it a
// success. This is the first stopping criterion from Zhu et al.
fn stop(f_x_old: f64, f_x: f64) -> bool {
    let negative_delta_f = &f_x_old - &f_x;
    let denom = f_x_old.abs().max(f_x.abs()).max(1.0);
    negative_delta_f / denom <= F_TOLERANCE
}

/// Returns a value of `x` that should minimize `f`. `f` must be convex and twice-differentiable.
///
/// - `x0` is an initial guess for `x`. Often this is chosen randomly.
/// - `f` is the objective function
/// - `g` is the gradient of `f`
pub fn bfgs<F, G>(x0: Array1<f64>, f: F, g: G) -> Result<Array1<f64>, ()>
    where
        F: Fn(&Array1<f64>) -> f64,
        G: Fn(&Array1<f64>) -> Array1<f64>,
{
    let mut x = x0;
    let mut f_x = f(&x);
    let mut g_x = g(&x);
    let p = x.len();
    assert_eq!(g_x.dim(), x.dim());

    // Initialize the inverse approximate Hessian to the identity matrix
    let mut b_inv = new_identity_matrix(x.len());

    loop {
        // Find the search direction
        let search_dir = -1.0 * b_inv.dot(&g_x);

        // Find a good step size
        let epsilon = if let Ok(eps) = line_search(|epsilon| f(&(&search_dir * epsilon + &x))) {
            eps
        } else {
            return Err(());
        };

        // Save the old values
        let f_x_old = f_x;
        let g_x_old = g_x;

        // Take a step in the search direction
        x.scaled_add(epsilon, &search_dir);
        f_x = f(&x);
        g_x = g(&x);

        // Compute deltas between old and new
        let y: Array2<f64> = (&g_x - &g_x_old).into_shape((p, 1)).unwrap();
        let s: Array2<f64> = (epsilon * search_dir).into_shape((p, 1)).unwrap();
        let sy: f64 = s.t().dot(&y).into_shape(()).unwrap()[()];
        let ss: Array2<f64> = s.dot(&s.t());

        if stop(f_x_old, f_x) {
            return Ok(x);
        }

        // Update the Hessian approximation
        let to_add: Array2<f64> = ss * (sy + &y.t().dot(&b_inv.dot(&y))) / sy.powi(2);
        let to_sub: Array2<f64> = (b_inv.dot(&y).dot(&s.t()) + s.dot(&y.t().dot(&b_inv))) / sy;
        b_inv = b_inv + to_add - to_sub;
    }
}

#[cfg(test)]
mod tests {
    use ndarray::prelude::*;
    use spectral::prelude::*;
    use super::*;

    fn l2_distance(xs: &Array1<f64>, ys: &Array1<f64>) -> f64 {
        xs.iter().zip(ys.iter()).map(|(x, y)| (y - x).powi(2)).sum()
    }

    #[test]
    fn test_x_squared_1d() {
        let x0 = array![2.0];
        let f = |x: &Array1<f64>| x.iter().map(|xx| xx * xx).sum();
        let g = |x: &Array1<f64>| 2.0 * x;
        let x_min = bfgs(x0, f, g);
        assert_eq!(x_min, Ok(array![0.0]));
    }

    // An error because this function has a maximum instead of a minimum
    #[test]
    fn test_negative_x_squared() {
        let x0 = array![2.0];
        let f = |x: &Array1<f64>| x.iter().map(|xx| -xx * xx).sum();
        let g = |x: &Array1<f64>| -2.0 * x;
        let x_min = bfgs(x0, f, g);
        assert_eq!(x_min, Err(()));
    }

    #[test]
    fn test_x_squared_big_d() {
        let p = 10_000;
        let x0 = Array1::from_elem(p, 2.0);
        let f = |x: &Array1<f64>| x.iter().map(|xx| xx * xx).sum();
        let g = |x: &Array1<f64>| 2.0 * x;
        let x_min = bfgs(x0, f, g);
        assert_eq!(x_min, Ok(Array1::zeros(p)));
    }

    #[test]
    fn test_rosenbrock() {
        let x0 = array![0.0, 0.0];
        let f = |x: &Array1<f64>| (1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2);
        let g = |x: &Array1<f64>| {
            array![
                -400.0 * (x[1] - x[0].powi(2)) * x[0] - 2.0 * (1.0 - x[0]),
                200.0 * (x[1] - x[0].powi(2)),
            ]
        };
        if let Ok(x_min) = bfgs(x0, f, g) {
            assert_that(&l2_distance(&x_min, &array![1.0, 1.0])).is_less_than(&0.01);
        } else {
            panic!("Rosenbrock test failed")
        }
    }
}